# listening, and reachable from other hosts in the cluster.
SAMPLE:
InternalURLs:
- "http://example.host:12345": {}
- SAMPLE: {}
+ "http://host1.example:12345": {}
+ "http://host2.example:12345":
+ # Rendezvous is normally empty/omitted. When changing the
+ # URL of a Keepstore service, Rendezvous should be set to
+ # the old URL (with trailing slash omitted) to preserve
+ # rendezvous ordering.
+ Rendezvous: ""
+ SAMPLE:
+ Rendezvous: ""
ExternalURL: "-"
RailsAPI:
# parameter higher than this value, this value is used instead.
MaxItemsPerResponse: 1000
+ # Maximum number of concurrent requests to accept in a single
+ # service process, or 0 for no limit. Currently supported only
+ # by keepstore.
+ MaxConcurrentRequests: 0
+
+ # Maximum number of 64MiB memory buffers per keepstore server
+ # process, or 0 for no limit.
+ MaxKeepBlobBuffers: 128
+
# API methods to disable. Disabled methods are not listed in the
# discovery document, and respond 404 to all requests.
# Example: {"jobs.create":{}, "pipeline_instances.create": {}}
WebsocketClientEventQueue: 64
WebsocketServerEventQueue: 4
+ # Timeout on requests to internal Keep services.
+ KeepServiceRequestTimeout: 15s
+
Users:
# Config parameters to automatically setup new users. If enabled,
# this users will be able to self-activate. Enable this if you want
# to run an open instance where anyone can create an account and use
# the system without requiring manual approval.
#
- # The params auto_setup_new_users_with_* are meaningful only when auto_setup_new_users is turned on.
- # auto_setup_name_blacklist is a list of usernames to be blacklisted for auto setup.
+ # The params AutoSetupNewUsersWith* are meaningful only when AutoSetupNewUsers is turned on.
+ # AutoSetupUsernameBlacklist is a list of usernames to be blacklisted for auto setup.
AutoSetupNewUsers: false
AutoSetupNewUsersWithVmUUID: ""
AutoSetupNewUsersWithRepository: false
syslog: {}
SAMPLE: {}
- # When new_users_are_active is set to true, new users will be active
+ # When NewUsersAreActive is set to true, new users will be active
# immediately. This skips the "self-activate" step which enforces
# user agreements. Should only be enabled for development.
NewUsersAreActive: false
# should be an address associated with a Google account.
AutoAdminUserWithEmail: ""
- # If auto_admin_first_user is set to true, the first user to log in when no
+ # If AutoAdminFirstUser is set to true, the first user to log in when no
# other admin users exist will automatically become an admin user.
AutoAdminFirstUser: false
NewUserNotificationRecipients: {}
NewInactiveUserNotificationRecipients: {}
- # Set anonymous_user_token to enable anonymous user access. You can get
+ # Set AnonymousUserToken to enable anonymous user access. You can get
# the token by running "bundle exec ./script/get_anonymous_user_token.rb"
# in the directory where your API server is running.
AnonymousUserToken: ""
+ # If a new user has an alternate email address (local@domain)
+ # with the domain given here, its local part becomes the new
+ # user's default username. Otherwise, the user's primary email
+ # address is used.
+ PreferDomainForUsername: ""
+
AuditLogs:
# Time to keep audit logs, in seconds. (An audit log is a row added
# to the "logs" table in the PostgreSQL database each time an
# Maximum number of log rows to delete in a single SQL transaction.
#
- # If max_audit_log_delete_batch is 0, log entries will never be
+ # If MaxDeleteBatch is 0, log entries will never be
# deleted by Arvados. Cleanup can be done by an external process
# without affecting any Arvados system processes, as long as very
# recent (<5 minutes old) logs are not deleted.
MaxRequestLogParamsSize: 2000
Collections:
- # Allow clients to create collections by providing a manifest with
- # unsigned data blob locators. IMPORTANT: This effectively disables
- # access controls for data stored in Keep: a client who knows a hash
- # can write a manifest that references the hash, pass it to
- # collections.create (which will create a permission link), use
- # collections.get to obtain a signature for that data locator, and
- # use that signed locator to retrieve the data from Keep. Therefore,
- # do not turn this on if your users expect to keep data private from
- # one another!
+
+ # Enable access controls for data stored in Keep. This should
+ # always be set to true on a production cluster.
BlobSigning: true
# BlobSigningKey is a string of alphanumeric characters used to
# generate permission signatures for Keep locators. It must be
- # identical to the permission key given to Keep. IMPORTANT: This is
- # a site secret. It should be at least 50 characters.
+ # identical to the permission key given to Keep. IMPORTANT: This
+ # is a site secret. It should be at least 50 characters.
#
- # Modifying blob_signing_key will invalidate all existing
+ # Modifying BlobSigningKey will invalidate all existing
# signatures, which can cause programs to fail (e.g., arv-put,
- # arv-get, and Crunch jobs). To avoid errors, rotate keys only when
- # no such processes are running.
+ # arv-get, and Crunch jobs). To avoid errors, rotate keys only
+ # when no such processes are running.
BlobSigningKey: ""
+ # Enable garbage collection of unreferenced blobs in Keep.
+ BlobTrash: true
+
+ # Time to leave unreferenced blobs in "trashed" state before
+ # deleting them, or 0 to skip the "trashed" state entirely and
+ # delete unreferenced blobs.
+ #
+ # If you use any Amazon S3 buckets as storage volumes, this
+ # must be at least 24h to avoid occasional data loss.
+ BlobTrashLifetime: 336h
+
+ # How often to check for (and delete) trashed blocks whose
+ # BlobTrashLifetime has expired.
+ BlobTrashCheckInterval: 24h
+
+ # Maximum number of concurrent "trash blob" and "delete trashed
+ # blob" operations conducted by a single keepstore process. Each
+ # of these can be set to 0 to disable the respective operation.
+ #
+ # If BlobTrashLifetime is zero, "trash" and "delete trash"
+ # happen at once, so only the lower of these two values is used.
+ BlobTrashConcurrency: 4
+ BlobDeleteConcurrency: 4
+
+ # Maximum number of concurrent "create additional replica of
+ # existing blob" operations conducted by a single keepstore
+ # process.
+ BlobReplicateConcurrency: 4
+
# Default replication level for collections. This is used when a
# collection's replication_desired attribute is nil.
DefaultReplication: 2
- # Lifetime (in seconds) of blob permission signatures generated by
- # the API server. This determines how long a client can take (after
- # retrieving a collection record) to retrieve the collection data
- # from Keep. If the client needs more time than that (assuming the
- # collection still has the same content and the relevant user/token
- # still has permission) the client can retrieve the collection again
- # to get fresh signatures.
+ # BlobSigningTTL determines the minimum lifetime of transient
+ # data, i.e., blocks that are not referenced by
+ # collections. Unreferenced blocks exist for two reasons:
+ #
+ # 1) A data block must be written to a disk/cloud backend device
+ # before a collection can be created/updated with a reference to
+ # it.
#
- # This must be exactly equal to the -blob-signature-ttl flag used by
- # keepstore servers. Otherwise, reading data blocks and saving
- # collections will fail with HTTP 403 permission errors.
+ # 2) Deleting or updating a collection can remove the last
+ # remaining reference to a data block.
#
- # Modifying blob_signature_ttl invalidates existing signatures; see
- # blob_signing_key note above.
+ # If BlobSigningTTL is too short, long-running
+ # processes/containers will fail when they take too long (a)
+ # between writing blocks and writing collections that reference
+ # them, or (b) between reading collections and reading the
+ # referenced blocks.
+ #
+ # If BlobSigningTTL is too long, data will still be stored long
+ # after the referring collections are deleted, and you will
+ # needlessly fill up disks or waste money on cloud storage.
+ #
+ # Modifying BlobSigningTTL invalidates existing signatures; see
+ # BlobSigningKey note above.
#
# The default is 2 weeks.
BlobSigningTTL: 336h
+ # When running keep-balance, this is the destination filename for
+ # the list of lost block hashes if there are any, one per line.
+ # Updated automically during each successful run.
+ BlobMissingReport: ""
+
+ # keep-balance operates periodically, i.e.: do a
+ # scan/balance operation, sleep, repeat.
+ #
+ # BalancePeriod determines the interval between start times of
+ # successive scan/balance operations. If a scan/balance operation
+ # takes longer than RunPeriod, the next one will follow it
+ # immediately.
+ #
+ # If SIGUSR1 is received during an idle period between operations,
+ # the next operation will start immediately.
+ BalancePeriod: 10m
+
+ # Limits the number of collections retrieved by keep-balance per
+ # API transaction. If this is zero, page size is
+ # determined by the API server's own page size limits (see
+ # API.MaxItemsPerResponse and API.MaxIndexDatabaseRead).
+ BalanceCollectionBatch: 0
+
+ # The size of keep-balance's internal queue of
+ # collections. Higher values use more memory and improve throughput
+ # by allowing keep-balance to fetch the next page of collections
+ # while the current page is still being processed. If this is zero
+ # or omitted, pages are processed serially.
+ BalanceCollectionBuffers: 1000
+
# Default lifetime for ephemeral collections: 2 weeks. This must not
- # be less than blob_signature_ttl.
+ # be less than BlobSigningTTL.
DefaultTrashLifetime: 336h
# Interval (seconds) between trash sweeps. During a trash sweep,
# If true, enable collection versioning.
# When a collection's preserve_version field is true or the current version
- # is older than the amount of seconds defined on preserve_version_if_idle,
+ # is older than the amount of seconds defined on PreserveVersionIfIdle,
# a snapshot of the collection's previous state is created and linked to
# the current collection.
CollectionVersioning: false
# > 0s = auto-create a new version when older than the specified number of seconds.
PreserveVersionIfIdle: -1s
+ # If non-empty, allow project and collection names to contain
+ # the "/" character (slash/stroke/solidus), and replace "/" with
+ # the given string in the filesystem hierarchy presented by
+ # WebDAV. Example values are "%2f" and "{slash}". Names that
+ # contain the substitution string itself may result in confusing
+ # behavior, so a value like "_" is not recommended.
+ #
+ # If the default empty value is used, the server will reject
+ # requests to create or rename a collection when the new name
+ # contains "/".
+ #
+ # If the value "/" is used, project and collection names
+ # containing "/" will be allowed, but they will not be
+ # accessible via WebDAV.
+ #
+ # Use of this feature is not recommended, if it can be avoided.
+ ForwardSlashNameSubstitution: ""
+
# Managed collection properties. At creation time, if the client didn't
# provide the listed keys, they will be automatically populated following
# one of the following behaviors:
# The default setting (false) is appropriate for a multi-user site.
TrustAllContent: false
+ # Cache parameters for WebDAV content serving:
+ # * TTL: Maximum time to cache manifests and permission checks.
+ # * UUIDTTL: Maximum time to cache collection state.
+ # * MaxBlockEntries: Maximum number of block cache entries.
+ # * MaxCollectionEntries: Maximum number of collection cache entries.
+ # * MaxCollectionBytes: Approximate memory limit for collection cache.
+ # * MaxPermissionEntries: Maximum number of permission cache entries.
+ # * MaxUUIDEntries: Maximum number of UUID cache entries.
+ WebDAVCache:
+ TTL: 300s
+ UUIDTTL: 5s
+ MaxBlockEntries: 4
+ MaxCollectionEntries: 1000
+ MaxCollectionBytes: 100000000
+ MaxPermissionEntries: 1000
+ MaxUUIDEntries: 1000
+
Login:
- # These settings are provided by your OAuth2 provider (e.g.,
- # sso-provider).
- ProviderAppSecret: ""
+ # These settings are provided by your OAuth2 provider (eg
+ # Google) used to perform upstream authentication.
ProviderAppID: ""
+ ProviderAppSecret: ""
+
+ # (Experimental) Authenticate with Google, bypassing the
+ # SSO-provider gateway service. Use the Google Cloud console to
+ # enable the People API (APIs and Services > Enable APIs and
+ # services > Google People API > Enable), generate a Client ID
+ # and secret (APIs and Services > Credentials > Create
+ # credentials > OAuth client ID > Web application) and add your
+ # controller's /login URL (e.g.,
+ # "https://zzzzz.example.com/login") as an authorized redirect
+ # URL.
+ #
+ # Incompatible with ForceLegacyAPI14. ProviderAppID must be
+ # blank.
+ GoogleClientID: ""
+ GoogleClientSecret: ""
+
+ # Allow users to log in to existing accounts using any verified
+ # email address listed by their Google account. If true, the
+ # Google People API must be enabled in order for Google login to
+ # work. If false, only the primary email address will be used.
+ GoogleAlternateEmailAddresses: true
+
+ # The cluster ID to delegate the user database. When set,
+ # logins on this cluster will be redirected to the login cluster
+ # (login cluster must appear in RemoteClusters with Proxy: true)
+ LoginCluster: ""
+
+ # How long a cached token belonging to a remote cluster will
+ # remain valid before it needs to be revalidated.
+ RemoteTokenRefresh: 5m
Git:
+ # Path to git or gitolite-shell executable. Each authenticated
+ # request will execute this program with the single argument "http-backend"
+ GitCommand: /usr/bin/git
+
+ # Path to Gitolite's home directory. If a non-empty path is given,
+ # the CGI environment will be set up to support the use of
+ # gitolite-shell as a GitCommand: for example, if GitoliteHome is
+ # "/gh", then the CGI environment will have GITOLITE_HTTP_HOME=/gh,
+ # PATH=$PATH:/gh/bin, and GL_BYPASS_ACCESS_CHECKS=1.
+ GitoliteHome: ""
+
# Git repositories must be readable by api server, or you won't be
# able to submit crunch jobs. To pass the test suites, put a clone
# of the arvados tree in {git_repositories_dir}/arvados.git or
# has been reached or crunch_log_seconds_between_events has elapsed since
# the last flush.
LogBytesPerEvent: 4096
- LogSecondsBetweenEvents: 1
+ LogSecondsBetweenEvents: 5s
# The sample period for throttling logs.
LogThrottlePeriod: 60s
SLURM:
PrioritySpread: 0
SbatchArgumentsList: []
+ SbatchEnvironmentVariables:
+ SAMPLE: ""
Managed:
# Path to dns server configuration directory
# (e.g. /etc/unbound.d/conf.d). If false, do not write any config
AssignNodeHostname: "compute%<slot_number>d"
JobsAPI:
- # Enable the legacy Jobs API. This value must be a string.
+ # Enable the legacy 'jobs' API (crunch v1). This value must be a string.
+ #
+ # Note: this only enables read-only access, creating new
+ # legacy jobs and pipelines is not supported.
+ #
# 'auto' -- (default) enable the Jobs API only if it has been used before
# (i.e., there are job records in the database)
# 'true' -- enable the Jobs API despite lack of existing records.
# {git_repositories_dir}/arvados/.git
GitInternalDir: /var/lib/arvados/internal.git
- # Docker image to be used when none found in runtime_constraints of a job
- DefaultDockerImage: ""
-
- # none or slurm_immediate
- CrunchJobWrapper: none
-
- # username, or false = do not set uid when running jobs.
- CrunchJobUser: crunch
-
- # The web service must be able to create/write this file, and
- # crunch-job must be able to stat() it.
- CrunchRefreshTrigger: /tmp/crunch_refresh_trigger
-
- # Control job reuse behavior when two completed jobs match the
- # search criteria and have different outputs.
- #
- # If true, in case of a conflict, reuse the earliest job (this is
- # similar to container reuse behavior).
- #
- # If false, in case of a conflict, do not reuse any completed job,
- # but do reuse an already-running job if available (this is the
- # original job reuse behavior, and is still the default).
- ReuseJobIfOutputsDiffer: false
-
CloudVMs:
# Enable the cloud scheduler (experimental).
Enable: false
# Worker VM image ID.
ImageID: ""
+ # An executable file (located on the dispatcher host) to be
+ # copied to cloud instances at runtime and used as the
+ # container runner/supervisor. The default value is the
+ # dispatcher program itself.
+ #
+ # Use the empty string to disable this step: nothing will be
+ # copied, and cloud instances are assumed to have a suitable
+ # version of crunch-run installed.
+ DeployRunnerBinary: "/proc/self/exe"
+
# Tags to add on all resources (VMs, NICs, disks) created by
# the container dispatcher. (Arvados's own tags --
# InstanceType, IdleBehavior, and InstanceSecret -- will also
Price: 0.1
Preemptible: false
+ Volumes:
+ SAMPLE:
+ # AccessViaHosts specifies which keepstore processes can read
+ # and write data on the volume.
+ #
+ # For a local filesystem, AccessViaHosts has one entry,
+ # indicating which server the filesystem is located on.
+ #
+ # For a network-attached backend accessible by all keepstore
+ # servers, like a cloud storage bucket or an NFS mount,
+ # AccessViaHosts can be empty/omitted.
+ #
+ # Further info/examples:
+ # https://doc.arvados.org/install/configure-fs-storage.html
+ # https://doc.arvados.org/install/configure-s3-object-storage.html
+ # https://doc.arvados.org/install/configure-azure-blob-storage.html
+ AccessViaHosts:
+ SAMPLE:
+ ReadOnly: false
+ "http://host1.example:25107": {}
+ ReadOnly: false
+ Replication: 1
+ StorageClasses:
+ default: true
+ SAMPLE: true
+ Driver: s3
+ DriverParameters:
+ # for s3 driver -- see
+ # https://doc.arvados.org/install/configure-s3-object-storage.html
+ IAMRole: aaaaa
+ AccessKey: aaaaa
+ SecretKey: aaaaa
+ Endpoint: ""
+ Region: us-east-1a
+ Bucket: aaaaa
+ LocationConstraint: false
+ IndexPageSize: 1000
+ ConnectTimeout: 1m
+ ReadTimeout: 10m
+ RaceWindow: 24h
+
+ # For S3 driver, potentially unsafe tuning parameter,
+ # intentionally excluded from main documentation.
+ #
+ # Enable deletion (garbage collection) even when the
+ # configured BlobTrashLifetime is zero. WARNING: eventual
+ # consistency may result in race conditions that can cause
+ # data loss. Do not enable this unless you understand and
+ # accept the risk.
+ UnsafeDelete: false
+
+ # for azure driver -- see
+ # https://doc.arvados.org/install/configure-azure-blob-storage.html
+ StorageAccountName: aaaaa
+ StorageAccountKey: aaaaa
+ StorageBaseURL: core.windows.net
+ ContainerName: aaaaa
+ RequestTimeout: 30s
+ ListBlobsRetryDelay: 10s
+ ListBlobsMaxAttempts: 10
+ MaxGetBytes: 0
+ WriteRaceInterval: 15s
+ WriteRacePollTime: 1s
+
+ # for local directory driver -- see
+ # https://doc.arvados.org/install/configure-fs-storage.html
+ Root: /var/lib/arvados/keep-data
+
+ # For local directory driver, potentially confusing tuning
+ # parameter, intentionally excluded from main documentation.
+ #
+ # When true, read and write operations (for whole 64MiB
+ # blocks) on an individual volume will queued and issued
+ # serially. When false, read and write operations will be
+ # issued concurrently.
+ #
+ # May possibly improve throughput if you have physical spinning disks
+ # and experience contention when there are multiple requests
+ # to the same volume.
+ #
+ # Otherwise, when using SSDs, RAID, or a shared network filesystem, you
+ # should leave this alone.
+ Serialize: false
+
Mail:
MailchimpAPIKey: ""
MailchimpListID: ""
VocabularyURL: ""
FileViewersConfigURL: ""
- # Use experimental controller code (see https://dev.arvados.org/issues/14287)
- EnableBetaController14287: false
+ # Workbench welcome screen, this is HTML text that will be
+ # incorporated directly onto the page.
+ WelcomePageHTML: |
+ <img src="/arvados-logo-big.png" style="width: 20%; float: right; padding: 1em;" />
+ <h2>Please log in.</h2>
+
+ <p>The "Log in" button below will show you a sign-in
+ page. After you log in, you will be redirected back to
+ Arvados Workbench.</p>
+
+ <p>If you have never used Arvados Workbench before, logging in
+ for the first time will automatically create a new
+ account.</p>
+
+ <i>Arvados Workbench uses your name and email address only for
+ identification, and does not retrieve any other personal
+ information.</i>
+
+ InactivePageHTML: |
+ <img src="/arvados-logo-big.png" style="width: 20%; float: right; padding: 1em;" />
+ <h3>Hi! You're logged in, but...</h3>
+ <p>Your account is inactive.</p>
+ <p>An administrator must activate your account before you can get
+ any further.</p>
+
+ # Bypass new (Arvados 1.5) API implementations, and hand off
+ # requests directly to Rails instead. This can provide a temporary
+ # workaround for clients that are incompatible with the new API
+ # implementation. Note that it also disables some new federation
+ # features and will be removed in a future release.
+ ForceLegacyAPI14: false
`)