X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/590c491796ece16caa4251dd443412f323492fea..fd38b59aac9b4453cf04fb7d6e1b8ad51549d6c3:/lib/config/generated_config.go diff --git a/lib/config/generated_config.go b/lib/config/generated_config.go index 4e7790603a..d21bb2d284 100644 --- a/lib/config/generated_config.go +++ b/lib/config/generated_config.go @@ -31,8 +31,15 @@ Clusters: # listening, and reachable from other hosts in the cluster. SAMPLE: InternalURLs: - "http://example.host:12345": {} - SAMPLE: {} + "http://host1.example:12345": {} + "http://host2.example:12345": + # Rendezvous is normally empty/omitted. When changing the + # URL of a Keepstore service, Rendezvous should be set to + # the old URL (with trailing slash omitted) to preserve + # rendezvous ordering. + Rendezvous: "" + SAMPLE: + Rendezvous: "" ExternalURL: "-" RailsAPI: @@ -182,6 +189,15 @@ Clusters: # parameter higher than this value, this value is used instead. MaxItemsPerResponse: 1000 + # Maximum number of concurrent requests to accept in a single + # service process, or 0 for no limit. Currently supported only + # by keepstore. + MaxConcurrentRequests: 0 + + # Maximum number of 64MiB memory buffers per keepstore server + # process, or 0 for no limit. + MaxKeepBlobBuffers: 128 + # API methods to disable. Disabled methods are not listed in the # discovery document, and respond 404 to all requests. # Example: {"jobs.create":{}, "pipeline_instances.create": {}} @@ -212,14 +228,17 @@ Clusters: WebsocketClientEventQueue: 64 WebsocketServerEventQueue: 4 + # Timeout on requests to internal Keep services. + KeepServiceRequestTimeout: 15s + Users: # Config parameters to automatically setup new users. If enabled, # this users will be able to self-activate. Enable this if you want # to run an open instance where anyone can create an account and use # the system without requiring manual approval. # - # The params auto_setup_new_users_with_* are meaningful only when auto_setup_new_users is turned on. - # auto_setup_name_blacklist is a list of usernames to be blacklisted for auto setup. + # The params AutoSetupNewUsersWith* are meaningful only when AutoSetupNewUsers is turned on. + # AutoSetupUsernameBlacklist is a list of usernames to be blacklisted for auto setup. AutoSetupNewUsers: false AutoSetupNewUsersWithVmUUID: "" AutoSetupNewUsersWithRepository: false @@ -232,7 +251,7 @@ Clusters: syslog: {} SAMPLE: {} - # When new_users_are_active is set to true, new users will be active + # When NewUsersAreActive is set to true, new users will be active # immediately. This skips the "self-activate" step which enforces # user agreements. Should only be enabled for development. NewUsersAreActive: false @@ -244,7 +263,7 @@ Clusters: # should be an address associated with a Google account. AutoAdminUserWithEmail: "" - # If auto_admin_first_user is set to true, the first user to log in when no + # If AutoAdminFirstUser is set to true, the first user to log in when no # other admin users exist will automatically become an admin user. AutoAdminFirstUser: false @@ -257,7 +276,7 @@ Clusters: NewUserNotificationRecipients: {} NewInactiveUserNotificationRecipients: {} - # Set anonymous_user_token to enable anonymous user access. You can get + # Set AnonymousUserToken to enable anonymous user access. You can get # the token by running "bundle exec ./script/get_anonymous_user_token.rb" # in the directory where your API server is running. AnonymousUserToken: "" @@ -273,7 +292,7 @@ Clusters: # Maximum number of log rows to delete in a single SQL transaction. # - # If max_audit_log_delete_batch is 0, log entries will never be + # If MaxDeleteBatch is 0, log entries will never be # deleted by Arvados. Cleanup can be done by an external process # without affecting any Arvados system processes, as long as very # recent (<5 minutes old) logs are not deleted. @@ -306,52 +325,114 @@ Clusters: MaxRequestLogParamsSize: 2000 Collections: - # Allow clients to create collections by providing a manifest with - # unsigned data blob locators. IMPORTANT: This effectively disables - # access controls for data stored in Keep: a client who knows a hash - # can write a manifest that references the hash, pass it to - # collections.create (which will create a permission link), use - # collections.get to obtain a signature for that data locator, and - # use that signed locator to retrieve the data from Keep. Therefore, - # do not turn this on if your users expect to keep data private from - # one another! + + # Enable access controls for data stored in Keep. This should + # always be set to true on a production cluster. BlobSigning: true # BlobSigningKey is a string of alphanumeric characters used to # generate permission signatures for Keep locators. It must be - # identical to the permission key given to Keep. IMPORTANT: This is - # a site secret. It should be at least 50 characters. + # identical to the permission key given to Keep. IMPORTANT: This + # is a site secret. It should be at least 50 characters. # - # Modifying blob_signing_key will invalidate all existing + # Modifying BlobSigningKey will invalidate all existing # signatures, which can cause programs to fail (e.g., arv-put, - # arv-get, and Crunch jobs). To avoid errors, rotate keys only when - # no such processes are running. + # arv-get, and Crunch jobs). To avoid errors, rotate keys only + # when no such processes are running. BlobSigningKey: "" + # Enable garbage collection of unreferenced blobs in Keep. + BlobTrash: true + + # Time to leave unreferenced blobs in "trashed" state before + # deleting them, or 0 to skip the "trashed" state entirely and + # delete unreferenced blobs. + # + # If you use any Amazon S3 buckets as storage volumes, this + # must be at least 24h to avoid occasional data loss. + BlobTrashLifetime: 336h + + # How often to check for (and delete) trashed blocks whose + # BlobTrashLifetime has expired. + BlobTrashCheckInterval: 24h + + # Maximum number of concurrent "trash blob" and "delete trashed + # blob" operations conducted by a single keepstore process. Each + # of these can be set to 0 to disable the respective operation. + # + # If BlobTrashLifetime is zero, "trash" and "delete trash" + # happen at once, so only the lower of these two values is used. + BlobTrashConcurrency: 4 + BlobDeleteConcurrency: 4 + + # Maximum number of concurrent "create additional replica of + # existing blob" operations conducted by a single keepstore + # process. + BlobReplicateConcurrency: 4 + # Default replication level for collections. This is used when a # collection's replication_desired attribute is nil. DefaultReplication: 2 - # Lifetime (in seconds) of blob permission signatures generated by - # the API server. This determines how long a client can take (after - # retrieving a collection record) to retrieve the collection data - # from Keep. If the client needs more time than that (assuming the - # collection still has the same content and the relevant user/token - # still has permission) the client can retrieve the collection again - # to get fresh signatures. + # BlobSigningTTL determines the minimum lifetime of transient + # data, i.e., blocks that are not referenced by + # collections. Unreferenced blocks exist for two reasons: + # + # 1) A data block must be written to a disk/cloud backend device + # before a collection can be created/updated with a reference to + # it. + # + # 2) Deleting or updating a collection can remove the last + # remaining reference to a data block. # - # This must be exactly equal to the -blob-signature-ttl flag used by - # keepstore servers. Otherwise, reading data blocks and saving - # collections will fail with HTTP 403 permission errors. + # If BlobSigningTTL is too short, long-running + # processes/containers will fail when they take too long (a) + # between writing blocks and writing collections that reference + # them, or (b) between reading collections and reading the + # referenced blocks. # - # Modifying blob_signature_ttl invalidates existing signatures; see - # blob_signing_key note above. + # If BlobSigningTTL is too long, data will still be stored long + # after the referring collections are deleted, and you will + # needlessly fill up disks or waste money on cloud storage. + # + # Modifying BlobSigningTTL invalidates existing signatures; see + # BlobSigningKey note above. # # The default is 2 weeks. BlobSigningTTL: 336h + # When running keep-balance, this is the destination filename for + # the list of lost block hashes if there are any, one per line. + # Updated automically during each successful run. + BlobMissingReport: "" + + # keep-balance operates periodically, i.e.: do a + # scan/balance operation, sleep, repeat. + # + # BalancePeriod determines the interval between start times of + # successive scan/balance operations. If a scan/balance operation + # takes longer than RunPeriod, the next one will follow it + # immediately. + # + # If SIGUSR1 is received during an idle period between operations, + # the next operation will start immediately. + BalancePeriod: 10m + + # Limits the number of collections retrieved by keep-balance per + # API transaction. If this is zero, page size is + # determined by the API server's own page size limits (see + # API.MaxItemsPerResponse and API.MaxIndexDatabaseRead). + BalanceCollectionBatch: 0 + + # The size of keep-balance's internal queue of + # collections. Higher values use more memory and improve throughput + # by allowing keep-balance to fetch the next page of collections + # while the current page is still being processed. If this is zero + # or omitted, pages are processed serially. + BalanceCollectionBuffers: 1000 + # Default lifetime for ephemeral collections: 2 weeks. This must not - # be less than blob_signature_ttl. + # be less than BlobSigningTTL. DefaultTrashLifetime: 336h # Interval (seconds) between trash sweeps. During a trash sweep, @@ -361,7 +442,7 @@ Clusters: # If true, enable collection versioning. # When a collection's preserve_version field is true or the current version - # is older than the amount of seconds defined on preserve_version_if_idle, + # is older than the amount of seconds defined on PreserveVersionIfIdle, # a snapshot of the collection's previous state is created and linked to # the current collection. CollectionVersioning: false @@ -398,13 +479,50 @@ Clusters: # The default setting (false) is appropriate for a multi-user site. TrustAllContent: false + # Cache parameters for WebDAV content serving: + # * TTL: Maximum time to cache manifests and permission checks. + # * UUIDTTL: Maximum time to cache collection state. + # * MaxBlockEntries: Maximum number of block cache entries. + # * MaxCollectionEntries: Maximum number of collection cache entries. + # * MaxCollectionBytes: Approximate memory limit for collection cache. + # * MaxPermissionEntries: Maximum number of permission cache entries. + # * MaxUUIDEntries: Maximum number of UUID cache entries. + WebDAVCache: + TTL: 300s + UUIDTTL: 5s + MaxBlockEntries: 4 + MaxCollectionEntries: 1000 + MaxCollectionBytes: 100000000 + MaxPermissionEntries: 1000 + MaxUUIDEntries: 1000 + Login: - # These settings are provided by your OAuth2 provider (e.g., - # sso-provider). + # These settings are provided by your OAuth2 provider (eg + # Google) used to perform upstream authentication. ProviderAppSecret: "" ProviderAppID: "" + # The cluster ID to delegate the user database. When set, + # logins on this cluster will be redirected to the login cluster + # (login cluster must appear in RemoteHosts with Proxy: true) + LoginCluster: "" + + # How long a cached token belonging to a remote cluster will + # remain valid before it needs to be revalidated. + RemoteTokenRefresh: 5m + Git: + # Path to git or gitolite-shell executable. Each authenticated + # request will execute this program with the single argument "http-backend" + GitCommand: /usr/bin/git + + # Path to Gitolite's home directory. If a non-empty path is given, + # the CGI environment will be set up to support the use of + # gitolite-shell as a GitCommand: for example, if GitoliteHome is + # "/gh", then the CGI environment will have GITOLITE_HTTP_HOME=/gh, + # PATH=$PATH:/gh/bin, and GL_BYPASS_ACCESS_CHECKS=1. + GitoliteHome: "" + # Git repositories must be readable by api server, or you won't be # able to submit crunch jobs. To pass the test suites, put a clone # of the arvados tree in {git_repositories_dir}/arvados.git or @@ -531,9 +649,8 @@ Clusters: SLURM: PrioritySpread: 0 SbatchArgumentsList: [] - KeepServices: - SAMPLE: - InternalURLs: {} + SbatchEnvironmentVariables: + SAMPLE: "" Managed: # Path to dns server configuration directory # (e.g. /etc/unbound.d/conf.d). If false, do not write any config @@ -577,7 +694,11 @@ Clusters: AssignNodeHostname: "compute%d" JobsAPI: - # Enable the legacy Jobs API. This value must be a string. + # Enable the legacy 'jobs' API (crunch v1). This value must be a string. + # + # Note: this only enables read-only access, creating new + # legacy jobs and pipelines is not supported. + # # 'auto' -- (default) enable the Jobs API only if it has been used before # (i.e., there are job records in the database) # 'true' -- enable the Jobs API despite lack of existing records. @@ -590,30 +711,6 @@ Clusters: # {git_repositories_dir}/arvados/.git GitInternalDir: /var/lib/arvados/internal.git - # Docker image to be used when none found in runtime_constraints of a job - DefaultDockerImage: "" - - # none or slurm_immediate - CrunchJobWrapper: none - - # username, or false = do not set uid when running jobs. - CrunchJobUser: crunch - - # The web service must be able to create/write this file, and - # crunch-job must be able to stat() it. - CrunchRefreshTrigger: /tmp/crunch_refresh_trigger - - # Control job reuse behavior when two completed jobs match the - # search criteria and have different outputs. - # - # If true, in case of a conflict, reuse the earliest job (this is - # similar to container reuse behavior). - # - # If false, in case of a conflict, do not reuse any completed job, - # but do reuse an already-running job if available (this is the - # original job reuse behavior, and is still the default). - ReuseJobIfOutputsDiffer: false - CloudVMs: # Enable the cloud scheduler (experimental). Enable: false @@ -737,6 +834,67 @@ Clusters: Price: 0.1 Preemptible: false + Volumes: + SAMPLE: + # AccessViaHosts specifies which keepstore processes can read + # and write data on the volume. + # + # For a local filesystem, AccessViaHosts has one entry, + # indicating which server the filesystem is located on. + # + # For a network-attached backend accessible by all keepstore + # servers, like a cloud storage bucket or an NFS mount, + # AccessViaHosts can be empty/omitted. + # + # Further info/examples: + # https://doc.arvados.org/install/configure-fs-storage.html + # https://doc.arvados.org/install/configure-s3-object-storage.html + # https://doc.arvados.org/install/configure-azure-blob-storage.html + AccessViaHosts: + SAMPLE: + ReadOnly: false + "http://host1.example:25107": {} + ReadOnly: false + Replication: 1 + StorageClasses: + default: true + SAMPLE: true + Driver: s3 + DriverParameters: + + # for s3 driver -- see + # https://doc.arvados.org/install/configure-s3-object-storage.html + IAMRole: aaaaa + AccessKey: aaaaa + SecretKey: aaaaa + Endpoint: "" + Region: us-east-1a + Bucket: aaaaa + LocationConstraint: false + IndexPageSize: 1000 + ConnectTimeout: 1m + ReadTimeout: 10m + RaceWindow: 24h + UnsafeDelete: false + + # for azure driver -- see + # https://doc.arvados.org/install/configure-azure-blob-storage.html + StorageAccountName: aaaaa + StorageAccountKey: aaaaa + StorageBaseURL: core.windows.net + ContainerName: aaaaa + RequestTimeout: 30s + ListBlobsRetryDelay: 10s + ListBlobsMaxAttempts: 10 + MaxGetBytes: 0 + WriteRaceInterval: 15s + WriteRacePollTime: 1s + + # for local directory driver -- see + # https://doc.arvados.org/install/configure-fs-storage.html + Root: /var/lib/arvados/keep-data + Serialize: false + Mail: MailchimpAPIKey: "" MailchimpListID: ""