X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/acb1d98cdfa6e33bfd3e1bb7510b78a4fd9384e2..b9fd7e3f374248a61159e4750a84e38d1c48d5dd:/lib/config/config.default.yml diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml index 7fd6306185..59dabbb26d 100644 --- a/lib/config/config.default.yml +++ b/lib/config/config.default.yml @@ -25,8 +25,15 @@ Clusters: # listening, and reachable from other hosts in the cluster. SAMPLE: InternalURLs: - "http://example.host:12345": {} - SAMPLE: {} + "http://host1.example:12345": {} + "http://host2.example:12345": + # Rendezvous is normally empty/omitted. When changing the + # URL of a Keepstore service, Rendezvous should be set to + # the old URL (with trailing slash omitted) to preserve + # rendezvous ordering. + Rendezvous: "" + SAMPLE: + Rendezvous: "" ExternalURL: "-" RailsAPI: @@ -176,6 +183,15 @@ Clusters: # parameter higher than this value, this value is used instead. MaxItemsPerResponse: 1000 + # Maximum number of concurrent requests to accept in a single + # service process, or 0 for no limit. Currently supported only + # by keepstore. + MaxConcurrentRequests: 0 + + # Maximum number of 64MiB memory buffers per keepstore server + # process, or 0 for no limit. + MaxKeepBlobBuffers: 128 + # API methods to disable. Disabled methods are not listed in the # discovery document, and respond 404 to all requests. # Example: {"jobs.create":{}, "pipeline_instances.create": {}} @@ -206,6 +222,9 @@ Clusters: WebsocketClientEventQueue: 64 WebsocketServerEventQueue: 4 + # Timeout on requests to internal Keep services. + KeepServiceRequestTimeout: 15s + Users: # Config parameters to automatically setup new users. If enabled, # this users will be able to self-activate. Enable this if you want @@ -251,16 +270,17 @@ Clusters: NewUserNotificationRecipients: {} NewInactiveUserNotificationRecipients: {} - # Set anonymous_user_token to enable anonymous user access. You can get - # the token by running "bundle exec ./script/get_anonymous_user_token.rb" - # in the directory where your API server is running. - AnonymousUserToken: "" - # Set AnonymousUserToken to enable anonymous user access. You can get # the token by running "bundle exec ./script/get_anonymous_user_token.rb" # in the directory where your API server is running. AnonymousUserToken: "" + # If a new user has an alternate email address (local@domain) + # with the domain given here, its local part becomes the new + # user's default username. Otherwise, the user's primary email + # address is used. + PreferDomainForUsername: "" + AuditLogs: # Time to keep audit logs, in seconds. (An audit log is a row added # to the "logs" table in the PostgreSQL database each time an @@ -305,43 +325,75 @@ Clusters: MaxRequestLogParamsSize: 2000 Collections: - # Allow clients to create collections by providing a manifest with - # unsigned data blob locators. IMPORTANT: This effectively disables - # access controls for data stored in Keep: a client who knows a hash - # can write a manifest that references the hash, pass it to - # collections.create (which will create a permission link), use - # collections.get to obtain a signature for that data locator, and - # use that signed locator to retrieve the data from Keep. Therefore, - # do not turn this on if your users expect to keep data private from - # one another! + + # Enable access controls for data stored in Keep. This should + # always be set to true on a production cluster. BlobSigning: true # BlobSigningKey is a string of alphanumeric characters used to # generate permission signatures for Keep locators. It must be - # identical to the permission key given to Keep. IMPORTANT: This is - # a site secret. It should be at least 50 characters. + # identical to the permission key given to Keep. IMPORTANT: This + # is a site secret. It should be at least 50 characters. # # Modifying BlobSigningKey will invalidate all existing # signatures, which can cause programs to fail (e.g., arv-put, - # arv-get, and Crunch jobs). To avoid errors, rotate keys only when - # no such processes are running. + # arv-get, and Crunch jobs). To avoid errors, rotate keys only + # when no such processes are running. BlobSigningKey: "" + # Enable garbage collection of unreferenced blobs in Keep. + BlobTrash: true + + # Time to leave unreferenced blobs in "trashed" state before + # deleting them, or 0 to skip the "trashed" state entirely and + # delete unreferenced blobs. + # + # If you use any Amazon S3 buckets as storage volumes, this + # must be at least 24h to avoid occasional data loss. + BlobTrashLifetime: 336h + + # How often to check for (and delete) trashed blocks whose + # BlobTrashLifetime has expired. + BlobTrashCheckInterval: 24h + + # Maximum number of concurrent "trash blob" and "delete trashed + # blob" operations conducted by a single keepstore process. Each + # of these can be set to 0 to disable the respective operation. + # + # If BlobTrashLifetime is zero, "trash" and "delete trash" + # happen at once, so only the lower of these two values is used. + BlobTrashConcurrency: 4 + BlobDeleteConcurrency: 4 + + # Maximum number of concurrent "create additional replica of + # existing blob" operations conducted by a single keepstore + # process. + BlobReplicateConcurrency: 4 + # Default replication level for collections. This is used when a # collection's replication_desired attribute is nil. DefaultReplication: 2 - # Lifetime (in seconds) of blob permission signatures generated by - # the API server. This determines how long a client can take (after - # retrieving a collection record) to retrieve the collection data - # from Keep. If the client needs more time than that (assuming the - # collection still has the same content and the relevant user/token - # still has permission) the client can retrieve the collection again - # to get fresh signatures. + # BlobSigningTTL determines the minimum lifetime of transient + # data, i.e., blocks that are not referenced by + # collections. Unreferenced blocks exist for two reasons: + # + # 1) A data block must be written to a disk/cloud backend device + # before a collection can be created/updated with a reference to + # it. + # + # 2) Deleting or updating a collection can remove the last + # remaining reference to a data block. # - # This must be exactly equal to the -blob-signature-ttl flag used by - # keepstore servers. Otherwise, reading data blocks and saving - # collections will fail with HTTP 403 permission errors. + # If BlobSigningTTL is too short, long-running + # processes/containers will fail when they take too long (a) + # between writing blocks and writing collections that reference + # them, or (b) between reading collections and reading the + # referenced blocks. + # + # If BlobSigningTTL is too long, data will still be stored long + # after the referring collections are deleted, and you will + # needlessly fill up disks or waste money on cloud storage. # # Modifying BlobSigningTTL invalidates existing signatures; see # BlobSigningKey note above. @@ -349,6 +401,36 @@ Clusters: # The default is 2 weeks. BlobSigningTTL: 336h + # When running keep-balance, this is the destination filename for + # the list of lost block hashes if there are any, one per line. + # Updated automically during each successful run. + BlobMissingReport: "" + + # keep-balance operates periodically, i.e.: do a + # scan/balance operation, sleep, repeat. + # + # BalancePeriod determines the interval between start times of + # successive scan/balance operations. If a scan/balance operation + # takes longer than RunPeriod, the next one will follow it + # immediately. + # + # If SIGUSR1 is received during an idle period between operations, + # the next operation will start immediately. + BalancePeriod: 10m + + # Limits the number of collections retrieved by keep-balance per + # API transaction. If this is zero, page size is + # determined by the API server's own page size limits (see + # API.MaxItemsPerResponse and API.MaxIndexDatabaseRead). + BalanceCollectionBatch: 0 + + # The size of keep-balance's internal queue of + # collections. Higher values use more memory and improve throughput + # by allowing keep-balance to fetch the next page of collections + # while the current page is still being processed. If this is zero + # or omitted, pages are processed serially. + BalanceCollectionBuffers: 1000 + # Default lifetime for ephemeral collections: 2 weeks. This must not # be less than BlobSigningTTL. DefaultTrashLifetime: 336h @@ -370,6 +452,24 @@ Clusters: # > 0s = auto-create a new version when older than the specified number of seconds. PreserveVersionIfIdle: -1s + # If non-empty, allow project and collection names to contain + # the "/" character (slash/stroke/solidus), and replace "/" with + # the given string in the filesystem hierarchy presented by + # WebDAV. Example values are "%2f" and "{slash}". Names that + # contain the substitution string itself may result in confusing + # behavior, so a value like "_" is not recommended. + # + # If the default empty value is used, the server will reject + # requests to create or rename a collection when the new name + # contains "/". + # + # If the value "/" is used, project and collection names + # containing "/" will be allowed, but they will not be + # accessible via WebDAV. + # + # Use of this feature is not recommended, if it can be avoided. + ForwardSlashNameSubstitution: "" + # Managed collection properties. At creation time, if the client didn't # provide the listed keys, they will be automatically populated following # one of the following behaviors: @@ -400,6 +500,7 @@ Clusters: # Cache parameters for WebDAV content serving: # * TTL: Maximum time to cache manifests and permission checks. # * UUIDTTL: Maximum time to cache collection state. + # * MaxBlockEntries: Maximum number of block cache entries. # * MaxCollectionEntries: Maximum number of collection cache entries. # * MaxCollectionBytes: Approximate memory limit for collection cache. # * MaxPermissionEntries: Maximum number of permission cache entries. @@ -407,18 +508,60 @@ Clusters: WebDAVCache: TTL: 300s UUIDTTL: 5s + MaxBlockEntries: 4 MaxCollectionEntries: 1000 MaxCollectionBytes: 100000000 MaxPermissionEntries: 1000 MaxUUIDEntries: 1000 Login: - # These settings are provided by your OAuth2 provider (e.g., - # sso-provider). - ProviderAppSecret: "" + # These settings are provided by your OAuth2 provider (eg + # Google) used to perform upstream authentication. ProviderAppID: "" + ProviderAppSecret: "" + + # (Experimental) Authenticate with Google, bypassing the + # SSO-provider gateway service. Use the Google Cloud console to + # enable the People API (APIs and Services > Enable APIs and + # services > Google People API > Enable), generate a Client ID + # and secret (APIs and Services > Credentials > Create + # credentials > OAuth client ID > Web application) and add your + # controller's /login URL (e.g., + # "https://zzzzz.example.com/login") as an authorized redirect + # URL. + # + # Incompatible with ForceLegacyAPI14. ProviderAppID must be + # blank. + GoogleClientID: "" + GoogleClientSecret: "" + + # Allow users to log in to existing accounts using any verified + # email address listed by their Google account. If true, the + # Google People API must be enabled in order for Google login to + # work. If false, only the primary email address will be used. + GoogleAlternateEmailAddresses: true + + # The cluster ID to delegate the user database. When set, + # logins on this cluster will be redirected to the login cluster + # (login cluster must appear in RemoteClusters with Proxy: true) + LoginCluster: "" + + # How long a cached token belonging to a remote cluster will + # remain valid before it needs to be revalidated. + RemoteTokenRefresh: 5m Git: + # Path to git or gitolite-shell executable. Each authenticated + # request will execute this program with the single argument "http-backend" + GitCommand: /usr/bin/git + + # Path to Gitolite's home directory. If a non-empty path is given, + # the CGI environment will be set up to support the use of + # gitolite-shell as a GitCommand: for example, if GitoliteHome is + # "/gh", then the CGI environment will have GITOLITE_HTTP_HOME=/gh, + # PATH=$PATH:/gh/bin, and GL_BYPASS_ACCESS_CHECKS=1. + GitoliteHome: "" + # Git repositories must be readable by api server, or you won't be # able to submit crunch jobs. To pass the test suites, put a clone # of the arvados tree in {git_repositories_dir}/arvados.git or @@ -480,7 +623,7 @@ Clusters: # (experimental) cloud dispatcher for executing containers on # worker VMs. Begins with "-----BEGIN RSA PRIVATE KEY-----\n" # and ends with "\n-----END RSA PRIVATE KEY-----\n". - DispatchPrivateKey: none + DispatchPrivateKey: "" # Maximum time to wait for workers to come up before abandoning # stale locks from a previous dispatch process. @@ -512,7 +655,7 @@ Clusters: # has been reached or crunch_log_seconds_between_events has elapsed since # the last flush. LogBytesPerEvent: 4096 - LogSecondsBetweenEvents: 1 + LogSecondsBetweenEvents: 5s # The sample period for throttling logs. LogThrottlePeriod: 60s @@ -590,7 +733,11 @@ Clusters: AssignNodeHostname: "compute%d" JobsAPI: - # Enable the legacy Jobs API. This value must be a string. + # Enable the legacy 'jobs' API (crunch v1). This value must be a string. + # + # Note: this only enables read-only access, creating new + # legacy jobs and pipelines is not supported. + # # 'auto' -- (default) enable the Jobs API only if it has been used before # (i.e., there are job records in the database) # 'true' -- enable the Jobs API despite lack of existing records. @@ -603,30 +750,6 @@ Clusters: # {git_repositories_dir}/arvados/.git GitInternalDir: /var/lib/arvados/internal.git - # Docker image to be used when none found in runtime_constraints of a job - DefaultDockerImage: "" - - # none or slurm_immediate - CrunchJobWrapper: none - - # username, or false = do not set uid when running jobs. - CrunchJobUser: crunch - - # The web service must be able to create/write this file, and - # crunch-job must be able to stat() it. - CrunchRefreshTrigger: /tmp/crunch_refresh_trigger - - # Control job reuse behavior when two completed jobs match the - # search criteria and have different outputs. - # - # If true, in case of a conflict, reuse the earliest job (this is - # similar to container reuse behavior). - # - # If false, in case of a conflict, do not reuse any completed job, - # but do reuse an already-running job if available (this is the - # original job reuse behavior, and is still the default). - ReuseJobIfOutputsDiffer: false - CloudVMs: # Enable the cloud scheduler (experimental). Enable: false @@ -683,6 +806,16 @@ Clusters: # Worker VM image ID. ImageID: "" + # An executable file (located on the dispatcher host) to be + # copied to cloud instances at runtime and used as the + # container runner/supervisor. The default value is the + # dispatcher program itself. + # + # Use the empty string to disable this step: nothing will be + # copied, and cloud instances are assumed to have a suitable + # version of crunch-run installed. + DeployRunnerBinary: "/proc/self/exe" + # Tags to add on all resources (VMs, NICs, disks) created by # the container dispatcher. (Arvados's own tags -- # InstanceType, IdleBehavior, and InstanceSecret -- will also @@ -750,6 +883,90 @@ Clusters: Price: 0.1 Preemptible: false + Volumes: + SAMPLE: + # AccessViaHosts specifies which keepstore processes can read + # and write data on the volume. + # + # For a local filesystem, AccessViaHosts has one entry, + # indicating which server the filesystem is located on. + # + # For a network-attached backend accessible by all keepstore + # servers, like a cloud storage bucket or an NFS mount, + # AccessViaHosts can be empty/omitted. + # + # Further info/examples: + # https://doc.arvados.org/install/configure-fs-storage.html + # https://doc.arvados.org/install/configure-s3-object-storage.html + # https://doc.arvados.org/install/configure-azure-blob-storage.html + AccessViaHosts: + SAMPLE: + ReadOnly: false + "http://host1.example:25107": {} + ReadOnly: false + Replication: 1 + StorageClasses: + default: true + SAMPLE: true + Driver: s3 + DriverParameters: + # for s3 driver -- see + # https://doc.arvados.org/install/configure-s3-object-storage.html + IAMRole: aaaaa + AccessKey: aaaaa + SecretKey: aaaaa + Endpoint: "" + Region: us-east-1a + Bucket: aaaaa + LocationConstraint: false + IndexPageSize: 1000 + ConnectTimeout: 1m + ReadTimeout: 10m + RaceWindow: 24h + + # For S3 driver, potentially unsafe tuning parameter, + # intentionally excluded from main documentation. + # + # Enable deletion (garbage collection) even when the + # configured BlobTrashLifetime is zero. WARNING: eventual + # consistency may result in race conditions that can cause + # data loss. Do not enable this unless you understand and + # accept the risk. + UnsafeDelete: false + + # for azure driver -- see + # https://doc.arvados.org/install/configure-azure-blob-storage.html + StorageAccountName: aaaaa + StorageAccountKey: aaaaa + StorageBaseURL: core.windows.net + ContainerName: aaaaa + RequestTimeout: 30s + ListBlobsRetryDelay: 10s + ListBlobsMaxAttempts: 10 + MaxGetBytes: 0 + WriteRaceInterval: 15s + WriteRacePollTime: 1s + + # for local directory driver -- see + # https://doc.arvados.org/install/configure-fs-storage.html + Root: /var/lib/arvados/keep-data + + # For local directory driver, potentially confusing tuning + # parameter, intentionally excluded from main documentation. + # + # When true, read and write operations (for whole 64MiB + # blocks) on an individual volume will queued and issued + # serially. When false, read and write operations will be + # issued concurrently. + # + # May possibly improve throughput if you have physical spinning disks + # and experience contention when there are multiple requests + # to the same volume. + # + # Otherwise, when using SSDs, RAID, or a shared network filesystem, you + # should leave this alone. + Serialize: false + Mail: MailchimpAPIKey: "" MailchimpListID: "" @@ -918,5 +1135,43 @@ Clusters: VocabularyURL: "" FileViewersConfigURL: "" - # Use experimental controller code (see https://dev.arvados.org/issues/14287) - EnableBetaController14287: false + # Workbench welcome screen, this is HTML text that will be + # incorporated directly onto the page. + WelcomePageHTML: | + +

Please log in.

+ +

The "Log in" button below will show you a sign-in + page. After you log in, you will be redirected back to + Arvados Workbench.

+ +

If you have never used Arvados Workbench before, logging in + for the first time will automatically create a new + account.

+ + Arvados Workbench uses your name and email address only for + identification, and does not retrieve any other personal + information. + + # Workbench screen displayed to inactive users. This is HTML + # text that will be incorporated directly onto the page. + InactivePageHTML: | + +

Hi! You're logged in, but...

+

Your account is inactive.

+

An administrator must activate your account before you can get + any further.

+ + # Connecting to Arvados shell VMs tends to be site-specific. + # Put any special instructions here. This is HTML text that will + # be incorporated directly onto the Workbench page. + SSHHelpPageHTML: | + Accessing an Arvados VM with SSH (generic instructions). + Site configurations vary. Contact your local cluster administrator if you have difficulty accessing an Arvados shell node. + + # Bypass new (Arvados 1.5) API implementations, and hand off + # requests directly to Rails instead. This can provide a temporary + # workaround for clients that are incompatible with the new API + # implementation. Note that it also disables some new federation + # features and will be removed in a future release. + ForceLegacyAPI14: false