X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/d6c4fc82452b6c8e7fe492a0e2a163a19477f95a..a7631a1ccb6e2a6925d00a06562e171c4ce4ea2f:/lib/config/config.default.yml?ds=sidebyside diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml index 24b2e450eb..80294afaf3 100644 --- a/lib/config/config.default.yml +++ b/lib/config/config.default.yml @@ -25,8 +25,15 @@ Clusters: # listening, and reachable from other hosts in the cluster. SAMPLE: InternalURLs: - "http://example.host:12345": {} - SAMPLE: {} + "http://host1.example:12345": {} + "http://host2.example:12345": + # Rendezvous is normally empty/omitted. When changing the + # URL of a Keepstore service, Rendezvous should be set to + # the old URL (with trailing slash omitted) to preserve + # rendezvous ordering. + Rendezvous: "" + SAMPLE: + Rendezvous: "" ExternalURL: "-" RailsAPI: @@ -132,9 +139,6 @@ Clusters: Workbench2: InternalURLs: {} ExternalURL: "" - Nodemanager: - InternalURLs: {} - ExternalURL: "-" Health: InternalURLs: {} ExternalURL: "-" @@ -176,6 +180,24 @@ Clusters: # parameter higher than this value, this value is used instead. MaxItemsPerResponse: 1000 + # Maximum number of concurrent requests to accept in a single + # service process, or 0 for no limit. + MaxConcurrentRequests: 0 + + # Maximum number of 64MiB memory buffers per Keepstore server process, or + # 0 for no limit. When this limit is reached, up to + # (MaxConcurrentRequests - MaxKeepBlobBuffers) HTTP requests requiring + # buffers (like GET and PUT) will wait for buffer space to be released. + # Any HTTP requests beyond MaxConcurrentRequests will receive an + # immediate 503 response. + # + # MaxKeepBlobBuffers should be set such that (MaxKeepBlobBuffers * 64MiB + # * 1.1) fits comfortably in memory. On a host dedicated to running + # Keepstore, divide total memory by 88MiB to suggest a suitable value. + # For example, if grep MemTotal /proc/meminfo reports MemTotal: 7125440 + # kB, compute 7125440 / (88 * 1024)=79 and configure MaxBuffers: 79 + MaxKeepBlobBuffers: 128 + # API methods to disable. Disabled methods are not listed in the # discovery document, and respond 404 to all requests. # Example: {"jobs.create":{}, "pipeline_instances.create": {}} @@ -259,6 +281,12 @@ Clusters: # in the directory where your API server is running. AnonymousUserToken: "" + # If a new user has an alternate email address (local@domain) + # with the domain given here, its local part becomes the new + # user's default username. Otherwise, the user's primary email + # address is used. + PreferDomainForUsername: "" + AuditLogs: # Time to keep audit logs, in seconds. (An audit log is a row added # to the "logs" table in the PostgreSQL database each time an @@ -303,43 +331,75 @@ Clusters: MaxRequestLogParamsSize: 2000 Collections: - # Allow clients to create collections by providing a manifest with - # unsigned data blob locators. IMPORTANT: This effectively disables - # access controls for data stored in Keep: a client who knows a hash - # can write a manifest that references the hash, pass it to - # collections.create (which will create a permission link), use - # collections.get to obtain a signature for that data locator, and - # use that signed locator to retrieve the data from Keep. Therefore, - # do not turn this on if your users expect to keep data private from - # one another! + + # Enable access controls for data stored in Keep. This should + # always be set to true on a production cluster. BlobSigning: true # BlobSigningKey is a string of alphanumeric characters used to # generate permission signatures for Keep locators. It must be - # identical to the permission key given to Keep. IMPORTANT: This is - # a site secret. It should be at least 50 characters. + # identical to the permission key given to Keep. IMPORTANT: This + # is a site secret. It should be at least 50 characters. # # Modifying BlobSigningKey will invalidate all existing # signatures, which can cause programs to fail (e.g., arv-put, - # arv-get, and Crunch jobs). To avoid errors, rotate keys only when - # no such processes are running. + # arv-get, and Crunch jobs). To avoid errors, rotate keys only + # when no such processes are running. BlobSigningKey: "" + # Enable garbage collection of unreferenced blobs in Keep. + BlobTrash: true + + # Time to leave unreferenced blobs in "trashed" state before + # deleting them, or 0 to skip the "trashed" state entirely and + # delete unreferenced blobs. + # + # If you use any Amazon S3 buckets as storage volumes, this + # must be at least 24h to avoid occasional data loss. + BlobTrashLifetime: 336h + + # How often to check for (and delete) trashed blocks whose + # BlobTrashLifetime has expired. + BlobTrashCheckInterval: 24h + + # Maximum number of concurrent "trash blob" and "delete trashed + # blob" operations conducted by a single keepstore process. Each + # of these can be set to 0 to disable the respective operation. + # + # If BlobTrashLifetime is zero, "trash" and "delete trash" + # happen at once, so only the lower of these two values is used. + BlobTrashConcurrency: 4 + BlobDeleteConcurrency: 4 + + # Maximum number of concurrent "create additional replica of + # existing blob" operations conducted by a single keepstore + # process. + BlobReplicateConcurrency: 4 + # Default replication level for collections. This is used when a # collection's replication_desired attribute is nil. DefaultReplication: 2 - # Lifetime (in seconds) of blob permission signatures generated by - # the API server. This determines how long a client can take (after - # retrieving a collection record) to retrieve the collection data - # from Keep. If the client needs more time than that (assuming the - # collection still has the same content and the relevant user/token - # still has permission) the client can retrieve the collection again - # to get fresh signatures. + # BlobSigningTTL determines the minimum lifetime of transient + # data, i.e., blocks that are not referenced by + # collections. Unreferenced blocks exist for two reasons: + # + # 1) A data block must be written to a disk/cloud backend device + # before a collection can be created/updated with a reference to + # it. + # + # 2) Deleting or updating a collection can remove the last + # remaining reference to a data block. # - # This must be exactly equal to the -blob-signature-ttl flag used by - # keepstore servers. Otherwise, reading data blocks and saving - # collections will fail with HTTP 403 permission errors. + # If BlobSigningTTL is too short, long-running + # processes/containers will fail when they take too long (a) + # between writing blocks and writing collections that reference + # them, or (b) between reading collections and reading the + # referenced blocks. + # + # If BlobSigningTTL is too long, data will still be stored long + # after the referring collections are deleted, and you will + # needlessly fill up disks or waste money on cloud storage. # # Modifying BlobSigningTTL invalidates existing signatures; see # BlobSigningKey note above. @@ -347,6 +407,43 @@ Clusters: # The default is 2 weeks. BlobSigningTTL: 336h + # When running keep-balance, this is the destination filename for + # the list of lost block hashes if there are any, one per line. + # Updated automically during each successful run. + BlobMissingReport: "" + + # keep-balance operates periodically, i.e.: do a + # scan/balance operation, sleep, repeat. + # + # BalancePeriod determines the interval between start times of + # successive scan/balance operations. If a scan/balance operation + # takes longer than RunPeriod, the next one will follow it + # immediately. + # + # If SIGUSR1 is received during an idle period between operations, + # the next operation will start immediately. + BalancePeriod: 10m + + # Limits the number of collections retrieved by keep-balance per + # API transaction. If this is zero, page size is + # determined by the API server's own page size limits (see + # API.MaxItemsPerResponse and API.MaxIndexDatabaseRead). + BalanceCollectionBatch: 0 + + # The size of keep-balance's internal queue of + # collections. Higher values use more memory and improve throughput + # by allowing keep-balance to fetch the next page of collections + # while the current page is still being processed. If this is zero + # or omitted, pages are processed serially. + BalanceCollectionBuffers: 1000 + + # Maximum time for a rebalancing run. This ensures keep-balance + # eventually gives up and retries if, for example, a network + # error causes a hung connection that is never closed by the + # OS. It should be long enough that it doesn't interrupt a + # long-running balancing operation. + BalanceTimeout: 6h + # Default lifetime for ephemeral collections: 2 weeks. This must not # be less than BlobSigningTTL. DefaultTrashLifetime: 336h @@ -368,6 +465,27 @@ Clusters: # > 0s = auto-create a new version when older than the specified number of seconds. PreserveVersionIfIdle: -1s + # If non-empty, allow project and collection names to contain + # the "/" character (slash/stroke/solidus), and replace "/" with + # the given string in the filesystem hierarchy presented by + # WebDAV. Example values are "%2f" and "{slash}". Names that + # contain the substitution string itself may result in confusing + # behavior, so a value like "_" is not recommended. + # + # If the default empty value is used, the server will reject + # requests to create or rename a collection when the new name + # contains "/". + # + # If the value "/" is used, project and collection names + # containing "/" will be allowed, but they will not be + # accessible via WebDAV. + # + # Use of this feature is not recommended, if it can be avoided. + ForwardSlashNameSubstitution: "" + + # Include "folder objects" in S3 ListObjects responses. + S3FolderObjects: true + # Managed collection properties. At creation time, if the client didn't # provide the listed keys, they will be automatically populated following # one of the following behaviors: @@ -413,10 +531,187 @@ Clusters: MaxUUIDEntries: 1000 Login: - # These settings are provided by your OAuth2 provider (e.g., - # sso-provider). - ProviderAppSecret: "" - ProviderAppID: "" + # One of the following mechanisms (SSO, Google, PAM, LDAP, or + # LoginCluster) should be enabled; see + # https://doc.arvados.org/install/setup-login.html + + Google: + # Authenticate with Google. + Enable: false + + # Use the Google Cloud console to enable the People API (APIs + # and Services > Enable APIs and services > Google People API + # > Enable), generate a Client ID and secret (APIs and + # Services > Credentials > Create credentials > OAuth client + # ID > Web application) and add your controller's /login URL + # (e.g., "https://zzzzz.example.com/login") as an authorized + # redirect URL. + # + # Incompatible with ForceLegacyAPI14. ProviderAppID must be + # blank. + ClientID: "" + ClientSecret: "" + + # Allow users to log in to existing accounts using any verified + # email address listed by their Google account. If true, the + # Google People API must be enabled in order for Google login to + # work. If false, only the primary email address will be used. + AlternateEmailAddresses: true + + OpenIDConnect: + # Authenticate with an OpenID Connect provider. + Enable: false + + # Issuer URL, e.g., "https://login.example.com". + # + # This must be exactly equal to the URL returned by the issuer + # itself in its config response ("isser" key). If the + # configured value is "https://example" and the provider + # returns "https://example:443" or "https://example/" then + # login will fail, even though those URLs are equivalent + # (RFC3986). + Issuer: "" + + # Your client ID and client secret (supplied by the provider). + ClientID: "" + ClientSecret: "" + + # OpenID claim field containing the user's email + # address. Normally "email"; see + # https://openid.net/specs/openid-connect-core-1_0.html#StandardClaims + EmailClaim: "email" + + # OpenID claim field containing the email verification + # flag. Normally "email_verified". To accept every returned + # email address without checking a "verified" field at all, + # use the empty string "". + EmailVerifiedClaim: "email_verified" + + # OpenID claim field containing the user's preferred + # username. If empty, use the mailbox part of the user's email + # address. + UsernameClaim: "" + + PAM: + # (Experimental) Use PAM to authenticate users. + Enable: false + + # PAM service name. PAM will apply the policy in the + # corresponding config file (e.g., /etc/pam.d/arvados) or, if + # there is none, the default "other" config. + Service: arvados + + # Domain name (e.g., "example.com") to use to construct the + # user's email address if PAM authentication returns a + # username with no "@". If empty, use the PAM username as the + # user's email address, whether or not it contains "@". + # + # Note that the email address is used as the primary key for + # user records when logging in. Therefore, if you change + # PAMDefaultEmailDomain after the initial installation, you + # should also update existing user records to reflect the new + # domain. Otherwise, next time those users log in, they will + # be given new accounts instead of accessing their existing + # accounts. + DefaultEmailDomain: "" + + LDAP: + # Use an LDAP service to authenticate users. + Enable: false + + # Server URL, like "ldap://ldapserver.example.com:389" or + # "ldaps://ldapserver.example.com:636". + URL: "ldap://ldap:389" + + # Use StartTLS upon connecting to the server. + StartTLS: true + + # Skip TLS certificate name verification. + InsecureTLS: false + + # Strip the @domain part if a user supplies an email-style + # username with this domain. If "*", strip any user-provided + # domain. If "", never strip the domain part. Example: + # "example.com" + StripDomain: "" + + # If, after applying StripDomain, the username contains no "@" + # character, append this domain to form an email-style + # username. Example: "example.com" + AppendDomain: "" + + # The LDAP attribute to filter on when looking up a username + # (after applying StripDomain and AppendDomain). + SearchAttribute: uid + + # Bind with this username (DN or UPN) and password when + # looking up the user record. + # + # Example user: "cn=admin,dc=example,dc=com" + SearchBindUser: "" + SearchBindPassword: "" + + # Directory base for username lookup. Example: + # "ou=Users,dc=example,dc=com" + SearchBase: "" + + # Additional filters to apply when looking up users' LDAP + # entries. This can be used to restrict access to a subset of + # LDAP users, or to disambiguate users from other directory + # entries that have the SearchAttribute present. + # + # Special characters in assertion values must be escaped (see + # RFC4515). + # + # Example: "(objectClass=person)" + SearchFilters: "" + + # LDAP attribute to use as the user's email address. + # + # Important: This must not be an attribute whose value can be + # edited in the directory by the users themselves. Otherwise, + # users can take over other users' Arvados accounts trivially + # (email address is the primary key for Arvados accounts.) + EmailAttribute: mail + + # LDAP attribute to use as the preferred Arvados username. If + # no value is found (or this config is empty) the username + # originally supplied by the user will be used. + UsernameAttribute: uid + + SSO: + # Authenticate with a separate SSO server. (Deprecated) + Enable: false + + # ProviderAppID and ProviderAppSecret are generated during SSO + # setup; see + # https://doc.arvados.org/v2.0/install/install-sso.html#update-config + ProviderAppID: "" + ProviderAppSecret: "" + + Test: + # Authenticate users listed here in the config file. This + # feature is intended to be used in test environments, and + # should not be used in production. + Enable: false + Users: + SAMPLE: + Email: alice@example.com + Password: xyzzy + + # The cluster ID to delegate the user database. When set, + # logins on this cluster will be redirected to the login cluster + # (login cluster must appear in RemoteClusters with Proxy: true) + LoginCluster: "" + + # How long a cached token belonging to a remote cluster will + # remain valid before it needs to be revalidated. + RemoteTokenRefresh: 5m + + # How long a client token created from a login flow will be valid without + # asking the user to re-login. Example values: 60m, 8h. + # Default value zero means tokens don't have expiration. + TokenLifetime: 0s Git: # Path to git or gitolite-shell executable. Each authenticated @@ -491,7 +786,7 @@ Clusters: # (experimental) cloud dispatcher for executing containers on # worker VMs. Begins with "-----BEGIN RSA PRIVATE KEY-----\n" # and ends with "\n-----END RSA PRIVATE KEY-----\n". - DispatchPrivateKey: none + DispatchPrivateKey: "" # Maximum time to wait for workers to come up before abandoning # stale locks from a previous dispatch process. @@ -523,7 +818,7 @@ Clusters: # has been reached or crunch_log_seconds_between_events has elapsed since # the last flush. LogBytesPerEvent: 4096 - LogSecondsBetweenEvents: 1 + LogSecondsBetweenEvents: 5s # The sample period for throttling logs. LogThrottlePeriod: 60s @@ -672,8 +967,24 @@ Clusters: TimeoutShutdown: 10s # Worker VM image ID. + # (aws) AMI identifier + # (azure) managed disks: the name of the managed disk image + # (azure) shared image gallery: the name of the image definition. Also + # see the SharedImageGalleryName and SharedImageGalleryImageVersion fields. + # (azure) unmanaged disks (deprecated): the complete URI of the VHD, e.g. + # https://xxxxx.blob.core.windows.net/system/Microsoft.Compute/Images/images/xxxxx.vhd ImageID: "" + # An executable file (located on the dispatcher host) to be + # copied to cloud instances at runtime and used as the + # container runner/supervisor. The default value is the + # dispatcher program itself. + # + # Use the empty string to disable this step: nothing will be + # copied, and cloud instances are assumed to have a suitable + # version of crunch-run installed. + DeployRunnerBinary: "/proc/self/exe" + # Tags to add on all resources (VMs, NICs, disks) created by # the container dispatcher. (Arvados's own tags -- # InstanceType, IdleBehavior, and InstanceSecret -- will also @@ -718,13 +1029,38 @@ Clusters: # (azure) Instance configuration. CloudEnvironment: AzurePublicCloud - ResourceGroup: "" Location: centralus + + # (azure) The resource group where the VM and virtual NIC will be + # created. + ResourceGroup: "" + + # (azure) The resource group of the Network to use for the virtual + # NIC (if different from ResourceGroup) + NetworkResourceGroup: "" Network: "" Subnet: "" + + # (azure) managed disks: The resource group where the managed disk + # image can be found (if different from ResourceGroup). + ImageResourceGroup: "" + + # (azure) shared image gallery: the name of the gallery + SharedImageGalleryName: "" + # (azure) shared image gallery: the version of the image definition + SharedImageGalleryImageVersion: "" + + # (azure) unmanaged disks (deprecated): Where to store the VM VHD blobs StorageAccount: "" BlobContainer: "" + + # (azure) How long to wait before deleting VHD and NIC + # objects that are no longer being used. DeleteDanglingResourcesAfter: 20s + + # Account (that already exists in the VM image) that will be + # set up with an ssh authorized key to allow the compute + # dispatcher to connect. AdminUsername: arvados InstanceTypes: @@ -741,6 +1077,93 @@ Clusters: Price: 0.1 Preemptible: false + Volumes: + SAMPLE: + # AccessViaHosts specifies which keepstore processes can read + # and write data on the volume. + # + # For a local filesystem, AccessViaHosts has one entry, + # indicating which server the filesystem is located on. + # + # For a network-attached backend accessible by all keepstore + # servers, like a cloud storage bucket or an NFS mount, + # AccessViaHosts can be empty/omitted. + # + # Further info/examples: + # https://doc.arvados.org/install/configure-fs-storage.html + # https://doc.arvados.org/install/configure-s3-object-storage.html + # https://doc.arvados.org/install/configure-azure-blob-storage.html + AccessViaHosts: + SAMPLE: + ReadOnly: false + "http://host1.example:25107": {} + ReadOnly: false + Replication: 1 + StorageClasses: + default: true + SAMPLE: true + Driver: s3 + DriverParameters: + # for s3 driver -- see + # https://doc.arvados.org/install/configure-s3-object-storage.html + IAMRole: aaaaa + AccessKey: aaaaa + SecretKey: aaaaa + Endpoint: "" + Region: us-east-1a + Bucket: aaaaa + LocationConstraint: false + V2Signature: false + IndexPageSize: 1000 + ConnectTimeout: 1m + ReadTimeout: 10m + RaceWindow: 24h + # Use aws-s3-go (v2) instead of goamz + UseAWSS3v2Driver: false + + # For S3 driver, potentially unsafe tuning parameter, + # intentionally excluded from main documentation. + # + # Enable deletion (garbage collection) even when the + # configured BlobTrashLifetime is zero. WARNING: eventual + # consistency may result in race conditions that can cause + # data loss. Do not enable this unless you understand and + # accept the risk. + UnsafeDelete: false + + # for azure driver -- see + # https://doc.arvados.org/install/configure-azure-blob-storage.html + StorageAccountName: aaaaa + StorageAccountKey: aaaaa + StorageBaseURL: core.windows.net + ContainerName: aaaaa + RequestTimeout: 30s + ListBlobsRetryDelay: 10s + ListBlobsMaxAttempts: 10 + MaxGetBytes: 0 + WriteRaceInterval: 15s + WriteRacePollTime: 1s + + # for local directory driver -- see + # https://doc.arvados.org/install/configure-fs-storage.html + Root: /var/lib/arvados/keep-data + + # For local directory driver, potentially confusing tuning + # parameter, intentionally excluded from main documentation. + # + # When true, read and write operations (for whole 64MiB + # blocks) on an individual volume will queued and issued + # serially. When false, read and write operations will be + # issued concurrently. + # + # May possibly improve throughput if you have physical spinning disks + # and experience contention when there are multiple requests + # to the same volume. + # + # Otherwise, when using SSDs, RAID, or a shared network filesystem, you + # should leave this alone. + Serialize: false + Mail: MailchimpAPIKey: "" MailchimpListID: "" @@ -882,7 +1305,7 @@ Clusters: RunningJobLogRecordsToFetch: 2000 # In systems with many shared projects, loading of dashboard and topnav - # cab be slow due to collections indexing; use the following parameters + # can be slow due to collections indexing; use the following parameters # to suppress these properties ShowRecentCollectionsOnDashboard: true ShowUserNotifications: true @@ -891,7 +1314,7 @@ Clusters: # a link to the multi-site search page on a "home" Workbench site. # # Example: - # https://workbench.qr1hi.arvadosapi.com/collections/multisite + # https://workbench.zzzzz.arvadosapi.com/collections/multisite MultiSiteSearch: "" # Should workbench allow management of local git repositories? Set to false if @@ -909,5 +1332,69 @@ Clusters: VocabularyURL: "" FileViewersConfigURL: "" - # Use experimental controller code (see https://dev.arvados.org/issues/14287) - EnableBetaController14287: false + # Workbench welcome screen, this is HTML text that will be + # incorporated directly onto the page. + WelcomePageHTML: | + +

Please log in.

+ +

The "Log in" button below will show you a sign-in + page. After you log in, you will be redirected back to + Arvados Workbench.

+ +

If you have never used Arvados Workbench before, logging in + for the first time will automatically create a new + account.

+ + Arvados Workbench uses your name and email address only for + identification, and does not retrieve any other personal + information. + + # Workbench screen displayed to inactive users. This is HTML + # text that will be incorporated directly onto the page. + InactivePageHTML: | + +

Hi! You're logged in, but...

+

Your account is inactive.

+

An administrator must activate your account before you can get + any further.

+ + # Connecting to Arvados shell VMs tends to be site-specific. + # Put any special instructions here. This is HTML text that will + # be incorporated directly onto the Workbench page. + SSHHelpPageHTML: | + Accessing an Arvados VM with SSH (generic instructions). + Site configurations vary. Contact your local cluster administrator if you have difficulty accessing an Arvados shell node. + + # Sample text if you are using a "switchyard" ssh proxy. + # Replace "zzzzz" with your Cluster ID. + #SSHHelpPageHTML: | + #

Add a section like this to your SSH configuration file ( ~/.ssh/config):

+ #
Host *.zzzzz
+      #  TCPKeepAlive yes
+      #  ServerAliveInterval 60
+      #  ProxyCommand ssh -p2222 turnout@switchyard.zzzzz.arvadosapi.com -x -a $SSH_PROXY_FLAGS %h
+      # 
+ + # If you are using a switchyard ssh proxy, shell node hostnames + # may require a special hostname suffix. In the sample ssh + # configuration above, this would be ".zzzzz" + # This is added to the hostname in the "command line" column + # the Workbench "shell VMs" page. + # + # If your shell nodes are directly accessible by users without a + # proxy and have fully qualified host names, you should leave + # this blank. + SSHHelpHostSuffix: "" + + # Bypass new (Arvados 1.5) API implementations, and hand off + # requests directly to Rails instead. This can provide a temporary + # workaround for clients that are incompatible with the new API + # implementation. Note that it also disables some new federation + # features and will be removed in a future release. + ForceLegacyAPI14: false + +# (Experimental) Restart services automatically when config file +# changes are detected. Only supported by `arvados-server boot` in +# dev/test mode. +AutoReloadConfig: false