X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/d6c4fc82452b6c8e7fe492a0e2a163a19477f95a..fd38b59aac9b4453cf04fb7d6e1b8ad51549d6c3:/lib/config/generated_config.go diff --git a/lib/config/generated_config.go b/lib/config/generated_config.go index 8a5b4610c2..d21bb2d284 100644 --- a/lib/config/generated_config.go +++ b/lib/config/generated_config.go @@ -31,8 +31,15 @@ Clusters: # listening, and reachable from other hosts in the cluster. SAMPLE: InternalURLs: - "http://example.host:12345": {} - SAMPLE: {} + "http://host1.example:12345": {} + "http://host2.example:12345": + # Rendezvous is normally empty/omitted. When changing the + # URL of a Keepstore service, Rendezvous should be set to + # the old URL (with trailing slash omitted) to preserve + # rendezvous ordering. + Rendezvous: "" + SAMPLE: + Rendezvous: "" ExternalURL: "-" RailsAPI: @@ -182,6 +189,15 @@ Clusters: # parameter higher than this value, this value is used instead. MaxItemsPerResponse: 1000 + # Maximum number of concurrent requests to accept in a single + # service process, or 0 for no limit. Currently supported only + # by keepstore. + MaxConcurrentRequests: 0 + + # Maximum number of 64MiB memory buffers per keepstore server + # process, or 0 for no limit. + MaxKeepBlobBuffers: 128 + # API methods to disable. Disabled methods are not listed in the # discovery document, and respond 404 to all requests. # Example: {"jobs.create":{}, "pipeline_instances.create": {}} @@ -309,43 +325,75 @@ Clusters: MaxRequestLogParamsSize: 2000 Collections: - # Allow clients to create collections by providing a manifest with - # unsigned data blob locators. IMPORTANT: This effectively disables - # access controls for data stored in Keep: a client who knows a hash - # can write a manifest that references the hash, pass it to - # collections.create (which will create a permission link), use - # collections.get to obtain a signature for that data locator, and - # use that signed locator to retrieve the data from Keep. Therefore, - # do not turn this on if your users expect to keep data private from - # one another! + + # Enable access controls for data stored in Keep. This should + # always be set to true on a production cluster. BlobSigning: true # BlobSigningKey is a string of alphanumeric characters used to # generate permission signatures for Keep locators. It must be - # identical to the permission key given to Keep. IMPORTANT: This is - # a site secret. It should be at least 50 characters. + # identical to the permission key given to Keep. IMPORTANT: This + # is a site secret. It should be at least 50 characters. # # Modifying BlobSigningKey will invalidate all existing # signatures, which can cause programs to fail (e.g., arv-put, - # arv-get, and Crunch jobs). To avoid errors, rotate keys only when - # no such processes are running. + # arv-get, and Crunch jobs). To avoid errors, rotate keys only + # when no such processes are running. BlobSigningKey: "" + # Enable garbage collection of unreferenced blobs in Keep. + BlobTrash: true + + # Time to leave unreferenced blobs in "trashed" state before + # deleting them, or 0 to skip the "trashed" state entirely and + # delete unreferenced blobs. + # + # If you use any Amazon S3 buckets as storage volumes, this + # must be at least 24h to avoid occasional data loss. + BlobTrashLifetime: 336h + + # How often to check for (and delete) trashed blocks whose + # BlobTrashLifetime has expired. + BlobTrashCheckInterval: 24h + + # Maximum number of concurrent "trash blob" and "delete trashed + # blob" operations conducted by a single keepstore process. Each + # of these can be set to 0 to disable the respective operation. + # + # If BlobTrashLifetime is zero, "trash" and "delete trash" + # happen at once, so only the lower of these two values is used. + BlobTrashConcurrency: 4 + BlobDeleteConcurrency: 4 + + # Maximum number of concurrent "create additional replica of + # existing blob" operations conducted by a single keepstore + # process. + BlobReplicateConcurrency: 4 + # Default replication level for collections. This is used when a # collection's replication_desired attribute is nil. DefaultReplication: 2 - # Lifetime (in seconds) of blob permission signatures generated by - # the API server. This determines how long a client can take (after - # retrieving a collection record) to retrieve the collection data - # from Keep. If the client needs more time than that (assuming the - # collection still has the same content and the relevant user/token - # still has permission) the client can retrieve the collection again - # to get fresh signatures. + # BlobSigningTTL determines the minimum lifetime of transient + # data, i.e., blocks that are not referenced by + # collections. Unreferenced blocks exist for two reasons: + # + # 1) A data block must be written to a disk/cloud backend device + # before a collection can be created/updated with a reference to + # it. + # + # 2) Deleting or updating a collection can remove the last + # remaining reference to a data block. + # + # If BlobSigningTTL is too short, long-running + # processes/containers will fail when they take too long (a) + # between writing blocks and writing collections that reference + # them, or (b) between reading collections and reading the + # referenced blocks. # - # This must be exactly equal to the -blob-signature-ttl flag used by - # keepstore servers. Otherwise, reading data blocks and saving - # collections will fail with HTTP 403 permission errors. + # If BlobSigningTTL is too long, data will still be stored long + # after the referring collections are deleted, and you will + # needlessly fill up disks or waste money on cloud storage. # # Modifying BlobSigningTTL invalidates existing signatures; see # BlobSigningKey note above. @@ -353,6 +401,36 @@ Clusters: # The default is 2 weeks. BlobSigningTTL: 336h + # When running keep-balance, this is the destination filename for + # the list of lost block hashes if there are any, one per line. + # Updated automically during each successful run. + BlobMissingReport: "" + + # keep-balance operates periodically, i.e.: do a + # scan/balance operation, sleep, repeat. + # + # BalancePeriod determines the interval between start times of + # successive scan/balance operations. If a scan/balance operation + # takes longer than RunPeriod, the next one will follow it + # immediately. + # + # If SIGUSR1 is received during an idle period between operations, + # the next operation will start immediately. + BalancePeriod: 10m + + # Limits the number of collections retrieved by keep-balance per + # API transaction. If this is zero, page size is + # determined by the API server's own page size limits (see + # API.MaxItemsPerResponse and API.MaxIndexDatabaseRead). + BalanceCollectionBatch: 0 + + # The size of keep-balance's internal queue of + # collections. Higher values use more memory and improve throughput + # by allowing keep-balance to fetch the next page of collections + # while the current page is still being processed. If this is zero + # or omitted, pages are processed serially. + BalanceCollectionBuffers: 1000 + # Default lifetime for ephemeral collections: 2 weeks. This must not # be less than BlobSigningTTL. DefaultTrashLifetime: 336h @@ -419,11 +497,20 @@ Clusters: MaxUUIDEntries: 1000 Login: - # These settings are provided by your OAuth2 provider (e.g., - # sso-provider). + # These settings are provided by your OAuth2 provider (eg + # Google) used to perform upstream authentication. ProviderAppSecret: "" ProviderAppID: "" + # The cluster ID to delegate the user database. When set, + # logins on this cluster will be redirected to the login cluster + # (login cluster must appear in RemoteHosts with Proxy: true) + LoginCluster: "" + + # How long a cached token belonging to a remote cluster will + # remain valid before it needs to be revalidated. + RemoteTokenRefresh: 5m + Git: # Path to git or gitolite-shell executable. Each authenticated # request will execute this program with the single argument "http-backend" @@ -747,6 +834,67 @@ Clusters: Price: 0.1 Preemptible: false + Volumes: + SAMPLE: + # AccessViaHosts specifies which keepstore processes can read + # and write data on the volume. + # + # For a local filesystem, AccessViaHosts has one entry, + # indicating which server the filesystem is located on. + # + # For a network-attached backend accessible by all keepstore + # servers, like a cloud storage bucket or an NFS mount, + # AccessViaHosts can be empty/omitted. + # + # Further info/examples: + # https://doc.arvados.org/install/configure-fs-storage.html + # https://doc.arvados.org/install/configure-s3-object-storage.html + # https://doc.arvados.org/install/configure-azure-blob-storage.html + AccessViaHosts: + SAMPLE: + ReadOnly: false + "http://host1.example:25107": {} + ReadOnly: false + Replication: 1 + StorageClasses: + default: true + SAMPLE: true + Driver: s3 + DriverParameters: + + # for s3 driver -- see + # https://doc.arvados.org/install/configure-s3-object-storage.html + IAMRole: aaaaa + AccessKey: aaaaa + SecretKey: aaaaa + Endpoint: "" + Region: us-east-1a + Bucket: aaaaa + LocationConstraint: false + IndexPageSize: 1000 + ConnectTimeout: 1m + ReadTimeout: 10m + RaceWindow: 24h + UnsafeDelete: false + + # for azure driver -- see + # https://doc.arvados.org/install/configure-azure-blob-storage.html + StorageAccountName: aaaaa + StorageAccountKey: aaaaa + StorageBaseURL: core.windows.net + ContainerName: aaaaa + RequestTimeout: 30s + ListBlobsRetryDelay: 10s + ListBlobsMaxAttempts: 10 + MaxGetBytes: 0 + WriteRaceInterval: 15s + WriteRacePollTime: 1s + + # for local directory driver -- see + # https://doc.arvados.org/install/configure-fs-storage.html + Root: /var/lib/arvados/keep-data + Serialize: false + Mail: MailchimpAPIKey: "" MailchimpListID: ""