git.arvados.org - arvados.git/blob - lib/config/config.default.yml

2 #

3 # SPDX-License-Identifier: AGPL-3.0

4

5 # Do not use this file for site configuration. Create

6 # /etc/arvados/config.yml instead.

7 #

8 # The order of precedence (highest to lowest):

9 # 1. Legacy component-specific config files (deprecated)

10 # 2. /etc/arvados/config.yml

11 # 3. config.default.yml

12

13 Clusters:

14 xxxxx:

15 # Token used internally by Arvados components to authenticate to

16 # one another. Use a string of at least 50 random alphanumerics.

17 SystemRootToken: ""

18

19 # Token to be included in all healthcheck requests. Disabled by default.

20 # Server expects request header of the format "Authorization: Bearer xxx"

21 ManagementToken: ""

22

23 Services:

24

25 # Each of the service sections below specifies InternalURLs

26 # (each with optional ListenURL) and ExternalURL.

27 #

28 # InternalURLs specify how other Arvados service processes will

29 # connect to the service. Typically these use internal hostnames

30 # and high port numbers. Example:

31 #

32 # InternalURLs:

33 # "http://host1.internal.example:12345": {}

34 # "http://host2.internal.example:12345": {}

35 #

36 # ListenURL specifies the address and port the service process's

37 # HTTP server should listen on, if different from the

38 # InternalURL itself. Example, using an intermediate TLS proxy:

39 #

40 # InternalURLs:

41 # "https://host1.internal.example":

42 # ListenURL: "http://10.0.0.7:12345"

43 #

44 # When there are multiple InternalURLs configured, the service

45 # process will try listening on each InternalURLs (using

46 # ListenURL if provided) until one works. If you use a ListenURL

47 # like "0.0.0.0" which can be bound on any machine, use an

48 # environment variable

49 # ARVADOS_SERVICE_INTERNAL_URL=http://host1.internal.example to

50 # control which entry to use.

51 #

52 # ExternalURL specifies how applications/clients will connect to

53 # the service, regardless of whether they are inside or outside

54 # the cluster. Example:

55 #

56 # ExternalURL: "https://keep.zzzzz.example.com/"

57 #

58 # To avoid routing internal traffic through external networks,

59 # use split-horizon DNS for ExternalURL host names: inside the

60 # cluster's private network "host.zzzzz.example.com" resolves to

61 # the host's private IP address, while outside the cluster

62 # "host.zzzzz.example.com" resolves to the host's public IP

63 # address (or its external gateway or load balancer).

64

65 RailsAPI:

66 InternalURLs: {SAMPLE: {ListenURL: ""}}

67 ExternalURL: ""

68 Controller:

69 InternalURLs: {SAMPLE: {ListenURL: ""}}

70 ExternalURL: ""

71 Websocket:

72 InternalURLs: {SAMPLE: {ListenURL: ""}}

73 ExternalURL: ""

74 Keepbalance:

75 InternalURLs: {SAMPLE: {ListenURL: ""}}

76 ExternalURL: ""

77 GitHTTP:

78 InternalURLs: {SAMPLE: {ListenURL: ""}}

79 ExternalURL: ""

80 GitSSH:

81 InternalURLs: {SAMPLE: {ListenURL: ""}}

82 ExternalURL: ""

83 DispatchCloud:

84 InternalURLs: {SAMPLE: {ListenURL: ""}}

85 ExternalURL: ""

86 DispatchLSF:

87 InternalURLs: {SAMPLE: {ListenURL: ""}}

88 ExternalURL: ""

89 DispatchSLURM:

90 InternalURLs: {SAMPLE: {ListenURL: ""}}

91 ExternalURL: ""

92 Keepproxy:

93 InternalURLs: {SAMPLE: {ListenURL: ""}}

94 ExternalURL: ""

95 WebDAV:

96 InternalURLs: {SAMPLE: {ListenURL: ""}}

97 # Base URL for Workbench inline preview. If blank, use

98 # WebDAVDownload instead, and disable inline preview.

99 # If both are empty, downloading collections from workbench

100 # will be impossible.

101 #

102 # It is important to properly configure the download service

103 # to migitate cross-site-scripting (XSS) attacks. A HTML page

104 # can be stored in collection. If an attacker causes a victim

105 # to visit that page through Workbench, it will be rendered by

106 # the browser. If all collections are served at the same

107 # domain, the browser will consider collections as coming from

108 # the same origin and having access to the same browsing data,

109 # enabling malicious Javascript on that page to access Arvados

110 # on behalf of the victim.

111 #

112 # This is mitigating by having separate domains for each

113 # collection, or limiting preview to circumstances where the

114 # collection is not accessed with the user's regular

115 # full-access token.

116 #

117 # Serve preview links using uuid or pdh in subdomain

118 # (requires wildcard DNS and TLS certificate)

119 # https://*.collections.uuid_prefix.arvadosapi.com

120 #

121 # Serve preview links using uuid or pdh in main domain

122 # (requires wildcard DNS and TLS certificate)

123 # https://*--collections.uuid_prefix.arvadosapi.com

124 #

125 # Serve preview links by setting uuid or pdh in the path.

126 # This configuration only allows previews of public data or

127 # collection-sharing links, because these use the anonymous

128 # user token or the token is already embedded in the URL.

129 # Other data must be handled as downloads via WebDAVDownload:

130 # https://collections.uuid_prefix.arvadosapi.com

131 #

132 ExternalURL: ""

133

134 WebDAVDownload:

135 InternalURLs: {SAMPLE: {ListenURL: ""}}

136 # Base URL for download links. If blank, serve links to WebDAV

137 # with disposition=attachment query param. Unlike preview links,

138 # browsers do not render attachments, so there is no risk of XSS.

139 #

140 # If WebDAVDownload is blank, and WebDAV uses a

141 # single-origin form, then Workbench will show an error page

142 #

143 # Serve download links by setting uuid or pdh in the path:

144 # https://download.uuid_prefix.arvadosapi.com

145 #

146 ExternalURL: ""

147

148 Keepstore:

149 InternalURLs:

150 SAMPLE:

151 ListenURL: ""

152 # Rendezvous is normally empty/omitted. When changing the

153 # URL of a Keepstore service, Rendezvous should be set to

154 # the old URL (with trailing slash omitted) to preserve

155 # rendezvous ordering.

156 Rendezvous: ""

157 ExternalURL: ""

158 Composer:

159 InternalURLs: {SAMPLE: {ListenURL: ""}}

160 ExternalURL: ""

161 WebShell:

162 InternalURLs: {SAMPLE: {ListenURL: ""}}

163 # ShellInABox service endpoint URL for a given VM. If empty, do not

164 # offer web shell logins.

165 #

166 # E.g., using a path-based proxy server to forward connections to shell hosts:

167 # https://webshell.uuid_prefix.arvadosapi.com

168 #

169 # E.g., using a name-based proxy server to forward connections to shell hosts:

170 # https://*.webshell.uuid_prefix.arvadosapi.com

171 ExternalURL: ""

172 Workbench1:

173 InternalURLs: {SAMPLE: {ListenURL: ""}}

174 ExternalURL: ""

175 Workbench2:

176 InternalURLs: {SAMPLE: {ListenURL: ""}}

177 ExternalURL: ""

178 Health:

179 InternalURLs: {SAMPLE: {ListenURL: ""}}

180 ExternalURL: ""

181

182 PostgreSQL:

183 # max concurrent connections per arvados server daemon

184 ConnectionPool: 32

185 Connection:

186 # All parameters here are passed to the PG client library in a connection string;

187 # see https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-PARAMKEYWORDS

188 host: ""

189 port: ""

190 user: ""

191 password: ""

192 dbname: ""

193 SAMPLE: ""

194 API:

195 # Limits for how long a client token created by regular users can be valid,

196 # and also is used as a default expiration policy when no expiration date is

197 # specified.

198 # Default value zero means token expirations don't get clamped and no

199 # default expiration is set.

200 MaxTokenLifetime: 0s

201

202 # Maximum size (in bytes) allowed for a single API request. This

203 # limit is published in the discovery document for use by clients.

204 # Note: You must separately configure the upstream web server or

205 # proxy to actually enforce the desired maximum request size on the

206 # server side.

207 MaxRequestSize: 134217728

208

209 # Limit the number of bytes read from the database during an index

210 # request (by retrieving and returning fewer rows than would

211 # normally be returned in a single response).

212 # Note 1: This setting never reduces the number of returned rows to

213 # zero, no matter how big the first data row is.

214 # Note 2: Currently, this is only checked against a specific set of

215 # columns that tend to get large (collections.manifest_text,

216 # containers.mounts, workflows.definition). Other fields (e.g.,

217 # "properties" hashes) are not counted against this limit.

218 MaxIndexDatabaseRead: 134217728

219

220 # Maximum number of items to return when responding to a APIs that

221 # can return partial result sets using limit and offset parameters

222 # (e.g., *.index, groups.contents). If a request specifies a "limit"

223 # parameter higher than this value, this value is used instead.

224 MaxItemsPerResponse: 1000

225

226 # Maximum number of concurrent requests to accept in a single

227 # service process, or 0 for no limit.

228 MaxConcurrentRequests: 64

229

230 # Fraction of MaxConcurrentRequests that can be "log create"

231 # messages at any given time. This is to prevent logging

232 # updates from crowding out more important requests.

233 LogCreateRequestFraction: 0.50

234

235 # Maximum number of 64MiB memory buffers per Keepstore server process, or

236 # 0 for no limit. When this limit is reached, up to

237 # (MaxConcurrentRequests - MaxKeepBlobBuffers) HTTP requests requiring

238 # buffers (like GET and PUT) will wait for buffer space to be released.

239 # Any HTTP requests beyond MaxConcurrentRequests will receive an

240 # immediate 503 response.

241 #

242 # MaxKeepBlobBuffers should be set such that (MaxKeepBlobBuffers * 64MiB

243 # * 1.1) fits comfortably in memory. On a host dedicated to running

244 # Keepstore, divide total memory by 88MiB to suggest a suitable value.

245 # For example, if grep MemTotal /proc/meminfo reports MemTotal: 7125440

246 # kB, compute 7125440 / (88 * 1024)=79 and set MaxKeepBlobBuffers: 79

247 MaxKeepBlobBuffers: 128

248

249 # API methods to disable. Disabled methods are not listed in the

250 # discovery document, and respond 404 to all requests.

251 # Example: {"jobs.create":{}, "pipeline_instances.create": {}}

252 DisabledAPIs: {}

253

254 # Interval (seconds) between asynchronous permission view updates. Any

255 # permission-updating API called with the 'async' parameter schedules a an

256 # update on the permission view in the future, if not already scheduled.

257 AsyncPermissionsUpdateInterval: 20s

258

259 # Maximum number of concurrent outgoing requests to make while

260 # serving a single incoming multi-cluster (federated) request.

261 MaxRequestAmplification: 4

262

263 # Maximum wall clock time to spend handling an incoming request.

264 RequestTimeout: 5m

265

266 # Websocket will send a periodic empty event after 'SendTimeout'

267 # if there is no other activity to maintain the connection /

268 # detect dropped connections.

269 SendTimeout: 60s

270

271 WebsocketClientEventQueue: 64

272 WebsocketServerEventQueue: 4

273

274 # Timeout on requests to internal Keep services.

275 KeepServiceRequestTimeout: 15s

276

277 # Vocabulary file path, local to the node running the controller.

278 # This JSON file should contain the description of what's allowed

279 # as object's metadata. Its format is described at:

280 # https://doc.arvados.org/admin/metadata-vocabulary.html

281 VocabularyPath: ""

282

283 # If true, a project must have a non-empty description field in

284 # order to be frozen.

285 FreezeProjectRequiresDescription: false

286

287 # Project properties that must have non-empty values in order to

288 # freeze a project. Example: "property_name": {}

289 FreezeProjectRequiresProperties:

290 SAMPLE: {}

291

292 # If true, only an admin user can un-freeze a project. If false,

293 # any user with "manage" permission can un-freeze.

294 UnfreezeProjectRequiresAdmin: false

295

296 # (Experimental) Use row-level locking on update API calls.

297 LockBeforeUpdate: false

298

299 Users:

300 # Config parameters to automatically setup new users. If enabled,

301 # this users will be able to self-activate. Enable this if you want

302 # to run an open instance where anyone can create an account and use

303 # the system without requiring manual approval.

304 #

305 # The params AutoSetupNewUsersWith* are meaningful only when AutoSetupNewUsers is turned on.

306 # AutoSetupUsernameBlacklist is a list of usernames to be blacklisted for auto setup.

307 AutoSetupNewUsers: false

308 AutoSetupNewUsersWithVmUUID: ""

309 AutoSetupNewUsersWithRepository: false

310 AutoSetupUsernameBlacklist:

311 arvados: {}

312 git: {}

313 gitolite: {}

314 gitolite-admin: {}

315 root: {}

316 syslog: {}

317 SAMPLE: {}

318

319 # When NewUsersAreActive is set to true, new users will be active

320 # immediately. This skips the "self-activate" step which enforces

321 # user agreements. Should only be enabled for development.

322 NewUsersAreActive: false

323

324 # Newly activated users (whether set up by an admin or via

325 # AutoSetupNewUsers) immediately become visible to other active

326 # users.

327 #

328 # On a multi-tenant cluster, where the intent is for users to be

329 # invisible to one another unless they have been added to the

330 # same group(s) via Workbench admin interface, change this to

331 # false.

332 ActivatedUsersAreVisibleToOthers: true

333

334 # The e-mail address of the user you would like to become marked as an admin

335 # user on their first login.

336 AutoAdminUserWithEmail: ""

337

338 # If AutoAdminFirstUser is set to true, the first user to log in when no

339 # other admin users exist will automatically become an admin user.

340 AutoAdminFirstUser: false

341

342 # Email address to notify whenever a user creates a profile for the

343 # first time

344 UserProfileNotificationAddress: ""

345 AdminNotifierEmailFrom: arvados@example.com

346 EmailSubjectPrefix: "[ARVADOS] "

347 UserNotifierEmailFrom: arvados@example.com

348 UserNotifierEmailBcc: {}

349 NewUserNotificationRecipients: {}

350 NewInactiveUserNotificationRecipients: {}

351

352 # Set AnonymousUserToken to enable anonymous user access. Populate this

353 # field with a random string at least 50 characters long.

354 AnonymousUserToken: ""

355

356 # If a new user has an alternate email address (local@domain)

357 # with the domain given here, its local part becomes the new

358 # user's default username. Otherwise, the user's primary email

359 # address is used.

360 PreferDomainForUsername: ""

361

362 UserSetupMailText: |

363 <% if not @user.full_name.empty? -%>

364 <%= @user.full_name %>,

365 <% else -%>

366 Hi there,

367 <% end -%>

368

369 Your Arvados account has been set up. You can log in at

370

371 <%= Rails.configuration.Services.Workbench1.ExternalURL %>

372

373 Thanks,

374 Your Arvados administrator.

375

376 # If RoleGroupsVisibleToAll is true, all role groups are visible

377 # to all active users.

378 #

379 # If false, users must be granted permission to role groups in

380 # order to see them. This is more appropriate for a multi-tenant

381 # cluster.

382 RoleGroupsVisibleToAll: true

383

384 # If CanCreateRoleGroups is true, regular (non-admin) users can

385 # create new role groups.

386 #

387 # If false, only admins can create new role groups.

388 CanCreateRoleGroups: true

389

390 # During each period, a log entry with event_type="activity"

391 # will be recorded for each user who is active during that

392 # period. The object_uuid attribute will indicate the user's

393 # UUID.

394 #

395 # Multiple log entries for the same user may be generated during

396 # a period if there are multiple controller processes or a

397 # controller process is restarted.

398 #

399 # Use 0 to disable activity logging.

400 ActivityLoggingPeriod: 24h

401

402 AuditLogs:

403 # Time to keep audit logs, in seconds. (An audit log is a row added

404 # to the "logs" table in the PostgreSQL database each time an

405 # Arvados object is created, modified, or deleted.)

406 #

407 # Currently, websocket event notifications rely on audit logs, so

408 # this should not be set lower than 300 (5 minutes).

409 MaxAge: 336h

410

411 # Maximum number of log rows to delete in a single SQL transaction.

412 #

413 # If MaxDeleteBatch is 0, log entries will never be

414 # deleted by Arvados. Cleanup can be done by an external process

415 # without affecting any Arvados system processes, as long as very

416 # recent (<5 minutes old) logs are not deleted.

417 #

418 # 100000 is a reasonable batch size for most sites.

419 MaxDeleteBatch: 0

420

421 # Attributes to suppress in events and audit logs. Notably,

422 # specifying {"manifest_text": {}} here typically makes the database

423 # smaller and faster.

424 #

425 # Warning: Using any non-empty value here can have undesirable side

426 # effects for any client or component that relies on event logs.

427 # Use at your own risk.

428 UnloggedAttributes: {}

429

430 SystemLogs:

431

432 # Logging threshold: panic, fatal, error, warn, info, debug, or

433 # trace

434 LogLevel: info

435

436 # Logging format: json or text

437 Format: json

438

439 # Maximum characters of (JSON-encoded) query parameters to include

440 # in each request log entry. When params exceed this size, they will

441 # be JSON-encoded, truncated to this size, and logged as

442 # params_truncated.

443 MaxRequestLogParamsSize: 2000

444

445 Collections:

446

447 # Enable access controls for data stored in Keep. This should

448 # always be set to true on a production cluster.

449 BlobSigning: true

450

451 # BlobSigningKey is a string of alphanumeric characters used to

452 # generate permission signatures for Keep locators. It must be

453 # identical to the permission key given to Keep. IMPORTANT: This

454 # is a site secret. It should be at least 50 characters.

455 #

456 # Modifying BlobSigningKey will invalidate all existing

457 # signatures, which can cause programs to fail (e.g., arv-put,

458 # arv-get, and Crunch jobs). To avoid errors, rotate keys only

459 # when no such processes are running.

460 BlobSigningKey: ""

461

462 # Enable garbage collection of unreferenced blobs in Keep.

463 BlobTrash: true

464

465 # Time to leave unreferenced blobs in "trashed" state before

466 # deleting them, or 0 to skip the "trashed" state entirely and

467 # delete unreferenced blobs.

468 #

469 # If you use any Amazon S3 buckets as storage volumes, this

470 # must be at least 24h to avoid occasional data loss.

471 BlobTrashLifetime: 336h

472

473 # How often to check for (and delete) trashed blocks whose

474 # BlobTrashLifetime has expired.

475 BlobTrashCheckInterval: 24h

476

477 # Maximum number of concurrent "trash blob" and "delete trashed

478 # blob" operations conducted by a single keepstore process. Each

479 # of these can be set to 0 to disable the respective operation.

480 #

481 # If BlobTrashLifetime is zero, "trash" and "delete trash"

482 # happen at once, so only the lower of these two values is used.

483 BlobTrashConcurrency: 4

484 BlobDeleteConcurrency: 4

485

486 # Maximum number of concurrent "create additional replica of

487 # existing blob" operations conducted by a single keepstore

488 # process.

489 BlobReplicateConcurrency: 4

490

491 # Default replication level for collections. This is used when a

492 # collection's replication_desired attribute is nil.

493 DefaultReplication: 2

494

495 # BlobSigningTTL determines the minimum lifetime of transient

496 # data, i.e., blocks that are not referenced by

497 # collections. Unreferenced blocks exist for two reasons:

498 #

499 # 1) A data block must be written to a disk/cloud backend device

500 # before a collection can be created/updated with a reference to

501 # it.

502 #

503 # 2) Deleting or updating a collection can remove the last

504 # remaining reference to a data block.

505 #

506 # If BlobSigningTTL is too short, long-running

507 # processes/containers will fail when they take too long (a)

508 # between writing blocks and writing collections that reference

509 # them, or (b) between reading collections and reading the

510 # referenced blocks.

511 #

512 # If BlobSigningTTL is too long, data will still be stored long

513 # after the referring collections are deleted, and you will

514 # needlessly fill up disks or waste money on cloud storage.

515 #

516 # Modifying BlobSigningTTL invalidates existing signatures; see

517 # BlobSigningKey note above.

518 #

519 # The default is 2 weeks.

520 BlobSigningTTL: 336h

521

522 # When running keep-balance, this is the destination filename for

523 # the list of lost block hashes if there are any, one per line.

524 # Updated automically during each successful run.

525 BlobMissingReport: ""

526

527 # keep-balance operates periodically, i.e.: do a

528 # scan/balance operation, sleep, repeat.

529 #

530 # BalancePeriod determines the interval between start times of

531 # successive scan/balance operations. If a scan/balance operation

532 # takes longer than BalancePeriod, the next one will follow it

533 # immediately.

534 #

535 # If SIGUSR1 is received during an idle period between operations,

536 # the next operation will start immediately.

537 BalancePeriod: 6h

538

539 # Limits the number of collections retrieved by keep-balance per

540 # API transaction. If this is zero, page size is

541 # determined by the API server's own page size limits (see

542 # API.MaxItemsPerResponse and API.MaxIndexDatabaseRead).

543 BalanceCollectionBatch: 0

544

545 # The size of keep-balance's internal queue of

546 # collections. Higher values may improve throughput by allowing

547 # keep-balance to fetch collections from the database while the

548 # current collection are still being processed, at the expense of

549 # using more memory. If this is zero or omitted, pages are

550 # processed serially.

551 BalanceCollectionBuffers: 4

552

553 # Maximum time for a rebalancing run. This ensures keep-balance

554 # eventually gives up and retries if, for example, a network

555 # error causes a hung connection that is never closed by the

556 # OS. It should be long enough that it doesn't interrupt a

557 # long-running balancing operation.

558 BalanceTimeout: 6h

559

560 # Maximum number of replication_confirmed /

561 # storage_classes_confirmed updates to write to the database

562 # after a rebalancing run. When many updates are needed, this

563 # spreads them over a few runs rather than applying them all at

564 # once.

565 BalanceUpdateLimit: 100000

566

567 # Default lifetime for ephemeral collections: 2 weeks. This must not

568 # be less than BlobSigningTTL.

569 DefaultTrashLifetime: 336h

570

571 # Interval (seconds) between trash sweeps. During a trash sweep,

572 # collections are marked as trash if their trash_at time has

573 # arrived, and deleted if their delete_at time has arrived.

574 TrashSweepInterval: 60s

575

576 # If true, enable collection versioning.

577 # When a collection's preserve_version field is true or the current version

578 # is older than the amount of seconds defined on PreserveVersionIfIdle,

579 # a snapshot of the collection's previous state is created and linked to

580 # the current collection.

581 CollectionVersioning: true

582

583 # 0s = auto-create a new version on every update.

584 # -1s = never auto-create new versions.

585 # > 0s = auto-create a new version when older than the specified number of seconds.

586 PreserveVersionIfIdle: 10s

587

588 # If non-empty, allow project and collection names to contain

589 # the "/" character (slash/stroke/solidus), and replace "/" with

590 # the given string in the filesystem hierarchy presented by

591 # WebDAV. Example values are "%2f" and "{slash}". Names that

592 # contain the substitution string itself may result in confusing

593 # behavior, so a value like "_" is not recommended.

594 #

595 # If the default empty value is used, the server will reject

596 # requests to create or rename a collection when the new name

597 # contains "/".

598 #

599 # If the value "/" is used, project and collection names

600 # containing "/" will be allowed, but they will not be

601 # accessible via WebDAV.

602 #

603 # Use of this feature is not recommended, if it can be avoided.

604 ForwardSlashNameSubstitution: ""

605

606 # Include "folder objects" in S3 ListObjects responses.

607 S3FolderObjects: true

608

609 # Managed collection properties. At creation time, if the client didn't

610 # provide the listed keys, they will be automatically populated following

611 # one of the following behaviors:

612 #

613 # * UUID of the user who owns the containing project.

614 # responsible_person_uuid: {Function: original_owner, Protected: true}

615 #

616 # * Default concrete value.

617 # foo_bar: {Value: baz, Protected: false}

618 #

619 # If Protected is true, only an admin user can modify its value.

620 ManagedProperties:

621 SAMPLE: {Function: original_owner, Protected: true}

622

623 # In "trust all content" mode, Workbench will redirect download

624 # requests to WebDAV preview link, even in the cases when

625 # WebDAV would have to expose XSS vulnerabilities in order to

626 # handle the redirect (see discussion on Services.WebDAV).

627 #

628 # This setting has no effect in the recommended configuration, where the

629 # WebDAV service is configured to have a separate domain for every

630 # collection and XSS protection is provided by browsers' same-origin

631 # policy.

632 #

633 # The default setting (false) is appropriate for a multi-user site.

634 TrustAllContent: false

635

636 # Cache parameters for WebDAV content serving:

637 WebDAVCache:

638 # Time to cache manifests, permission checks, and sessions.

639 TTL: 300s

640

641 # Block cache entries. Each block consumes up to 64 MiB RAM.

642 MaxBlockEntries: 20

643

644 # Approximate memory limit (in bytes) for session cache.

645 #

646 # Note this applies to the in-memory representation of

647 # projects and collections -- metadata, block locators,

648 # filenames, etc. -- excluding cached file content, which is

649 # limited by MaxBlockEntries.

650 MaxCollectionBytes: 100000000

651

652 # Persistent sessions.

653 MaxSessions: 100

654

655 # Selectively set permissions for regular users and admins to

656 # download or upload data files using the upload/download

657 # features for Workbench, WebDAV and S3 API support.

658 WebDAVPermission:

659 User:

660 Download: true

661 Upload: true

662 Admin:

663 Download: true

664 Upload: true

665

666 # Selectively set permissions for regular users and admins to be

667 # able to download or upload blocks using arv-put and

668 # arv-get from outside the cluster.

669 KeepproxyPermission:

670 User:

671 Download: true

672 Upload: true

673 Admin:

674 Download: true

675 Upload: true

676

677 # Post upload / download events to the API server logs table, so

678 # that they can be included in the arv-user-activity report.

679 # You can disable this if you find that it is creating excess

680 # load on the API server and you don't need it.

681 WebDAVLogEvents: true

682

683 Login:

684 # One of the following mechanisms (Google, PAM, LDAP, or

685 # LoginCluster) should be enabled; see

686 # https://doc.arvados.org/install/setup-login.html

687

688 Google:

689 # Authenticate with Google.

690 Enable: false

691

692 # Use the Google Cloud console to enable the People API (APIs

693 # and Services > Enable APIs and services > Google People API

694 # > Enable), generate a Client ID and secret (APIs and

695 # Services > Credentials > Create credentials > OAuth client

696 # ID > Web application) and add your controller's /login URL

697 # (e.g., "https://zzzzz.example.com/login") as an authorized

698 # redirect URL.

699 ClientID: ""

700 ClientSecret: ""

701

702 # Allow users to log in to existing accounts using any verified

703 # email address listed by their Google account. If true, the

704 # Google People API must be enabled in order for Google login to

705 # work. If false, only the primary email address will be used.

706 AlternateEmailAddresses: true

707

708 # Send additional parameters with authentication requests. See

709 # https://developers.google.com/identity/protocols/oauth2/openid-connect#authenticationuriparameters

710 # for a list of supported parameters.

711 AuthenticationRequestParameters:

712 # Show the "choose which Google account" page, even if the

713 # client is currently logged in to exactly one Google

714 # account.

715 prompt: select_account

716

717 SAMPLE: ""

718

719 OpenIDConnect:

720 # Authenticate with an OpenID Connect provider.

721 Enable: false

722

723 # Issuer URL, e.g., "https://login.example.com".

724 #

725 # This must be exactly equal to the URL returned by the issuer

726 # itself in its config response ("isser" key). If the

727 # configured value is "https://example" and the provider

728 # returns "https://example:443" or "https://example/" then

729 # login will fail, even though those URLs are equivalent

730 # (RFC3986).

731 Issuer: ""

732

733 # Your client ID and client secret (supplied by the provider).

734 ClientID: ""

735 ClientSecret: ""

736

737 # OpenID claim field containing the user's email

738 # address. Normally "email"; see

739 # https://openid.net/specs/openid-connect-core-1_0.html#StandardClaims

740 EmailClaim: "email"

741

742 # OpenID claim field containing the email verification

743 # flag. Normally "email_verified". To accept every returned

744 # email address without checking a "verified" field at all,

745 # use the empty string "".

746 EmailVerifiedClaim: "email_verified"

747

748 # OpenID claim field containing the user's preferred

749 # username. If empty, use the mailbox part of the user's email

750 # address.

751 UsernameClaim: ""

752

753 # Send additional parameters with authentication requests,

754 # like {display: page, prompt: consent}. See

755 # https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest

756 # and refer to your provider's documentation for supported

757 # parameters.

758 AuthenticationRequestParameters:

759 SAMPLE: ""

760

761 # Accept an OIDC access token as an API token if the OIDC

762 # provider's UserInfo endpoint accepts it.

763 #

764 # AcceptAccessTokenScope should also be used when enabling

765 # this feature.

766 AcceptAccessToken: false

767

768 # Before accepting an OIDC access token as an API token, first

769 # check that it is a JWT whose "scope" value includes this

770 # value. Example: "https://zzzzz.example.com/" (your Arvados

771 # API endpoint).

772 #

773 # If this value is empty and AcceptAccessToken is true, all

774 # access tokens will be accepted regardless of scope,

775 # including non-JWT tokens. This is not recommended.

776 AcceptAccessTokenScope: ""

777

778 PAM:

779 # Use PAM to authenticate users.

780 Enable: false

781

782 # PAM service name. PAM will apply the policy in the

783 # corresponding config file (e.g., /etc/pam.d/arvados) or, if

784 # there is none, the default "other" config.

785 Service: arvados

786

787 # Domain name (e.g., "example.com") to use to construct the

788 # user's email address if PAM authentication returns a

789 # username with no "@". If empty, use the PAM username as the

790 # user's email address, whether or not it contains "@".

791 #

792 # Note that the email address is used as the primary key for

793 # user records when logging in. Therefore, if you change

794 # PAMDefaultEmailDomain after the initial installation, you

795 # should also update existing user records to reflect the new

796 # domain. Otherwise, next time those users log in, they will

797 # be given new accounts instead of accessing their existing

798 # accounts.

799 DefaultEmailDomain: ""

800

801 LDAP:

802 # Use an LDAP service to authenticate users.

803 Enable: false

804

805 # Server URL, like "ldap://ldapserver.example.com:389" or

806 # "ldaps://ldapserver.example.com:636".

807 URL: "ldap://ldap:389"

808

809 # Use StartTLS upon connecting to the server.

810 StartTLS: true

811

812 # Skip TLS certificate name verification.

813 InsecureTLS: false

814

815 # Mininum TLS version to negotiate when connecting to server

816 # (ldaps://... or StartTLS). It may be necessary to set this

817 # to "1.1" for compatibility with older LDAP servers that fail

818 # with 'LDAP Result Code 200 "Network Error": TLS handshake

819 # failed (tls: server selected unsupported protocol version

820 # 301)'.

821 #

822 # If blank, use the recommended minimum version (1.2).

823 MinTLSVersion: ""

824

825 # Strip the @domain part if a user supplies an email-style

826 # username with this domain. If "*", strip any user-provided

827 # domain. If "", never strip the domain part. Example:

828 # "example.com"

829 StripDomain: ""

830

831 # If, after applying StripDomain, the username contains no "@"

832 # character, append this domain to form an email-style

833 # username. Example: "example.com"

834 AppendDomain: ""

835

836 # The LDAP attribute to filter on when looking up a username

837 # (after applying StripDomain and AppendDomain).

838 SearchAttribute: uid

839

840 # Bind with this username (DN or UPN) and password when

841 # looking up the user record.

842 #

843 # Example user: "cn=admin,dc=example,dc=com"

844 SearchBindUser: ""

845 SearchBindPassword: ""

846

847 # Directory base for username lookup. Example:

848 # "ou=Users,dc=example,dc=com"

849 SearchBase: ""

850

851 # Additional filters to apply when looking up users' LDAP

852 # entries. This can be used to restrict access to a subset of

853 # LDAP users, or to disambiguate users from other directory

854 # entries that have the SearchAttribute present.

855 #

856 # Special characters in assertion values must be escaped (see

857 # RFC4515).

858 #

859 # Example: "(objectClass=person)"

860 SearchFilters: ""

861

862 # LDAP attribute to use as the user's email address.

863 #

864 # Important: This must not be an attribute whose value can be

865 # edited in the directory by the users themselves. Otherwise,

866 # users can take over other users' Arvados accounts trivially

867 # (email address is the primary key for Arvados accounts.)

868 EmailAttribute: mail

869

870 # LDAP attribute to use as the preferred Arvados username. If

871 # no value is found (or this config is empty) the username

872 # originally supplied by the user will be used.

873 UsernameAttribute: uid

874

875 Test:

876 # Authenticate users listed here in the config file. This

877 # feature is intended to be used in test environments, and

878 # should not be used in production.

879 Enable: false

880 Users:

881 SAMPLE:

882 Email: alice@example.com

883 Password: xyzzy

884

885 # The cluster ID to delegate the user database. When set,

886 # logins on this cluster will be redirected to the login cluster

887 # (login cluster must appear in RemoteClusters with Proxy: true)

888 LoginCluster: ""

889

890 # How long a cached token belonging to a remote cluster will

891 # remain valid before it needs to be revalidated.

892 RemoteTokenRefresh: 5m

893

894 # How long a client token created from a login flow will be valid without

895 # asking the user to re-login. Example values: 60m, 8h.

896 # Default value zero means tokens don't have expiration.

897 TokenLifetime: 0s

898

899 # If true (default) tokens issued through login are allowed to create

900 # new tokens.

901 # If false, tokens issued through login are not allowed to

902 # viewing/creating other tokens. New tokens can only be created

903 # by going through login again.

904 IssueTrustedTokens: true

905

906 # Origins (scheme://host[:port]) of clients trusted to receive

907 # new tokens via login process. The ExternalURLs of the local

908 # Workbench1 and Workbench2 are trusted implicitly and do not

909 # need to be listed here. If this is a LoginCluster, you

910 # probably want to include the other Workbench instances in the

911 # federation in this list.

912 #

913 # A wildcard like "https://*.example" will match client URLs

914 # like "https://a.example" and "https://a.b.c.example".

915 #

916 # Example:

917 #

918 # TrustedClients:

919 # "https://workbench.other-cluster.example": {}

920 # "https://workbench2.other-cluster.example": {}

921 TrustedClients:

922 SAMPLE: {}

923

924 # Treat any origin whose host part is "localhost" or a private

925 # IP address (e.g., http://10.0.0.123:3000/) as if it were

926 # listed in TrustedClients.

927 #

928 # Intended only for test/development use. Not appropriate for

929 # production use.

930 TrustPrivateNetworks: false

931

932 Git:

933 # Path to git or gitolite-shell executable. Each authenticated

934 # request will execute this program with the single argument "http-backend"

935 GitCommand: /usr/bin/git

936

937 # Path to Gitolite's home directory. If a non-empty path is given,

938 # the CGI environment will be set up to support the use of

939 # gitolite-shell as a GitCommand: for example, if GitoliteHome is

940 # "/gh", then the CGI environment will have GITOLITE_HTTP_HOME=/gh,

941 # PATH=$PATH:/gh/bin, and GL_BYPASS_ACCESS_CHECKS=1.

942 GitoliteHome: ""

943

944 # Git repositories must be readable by api server, or you won't be

945 # able to submit crunch jobs. To pass the test suites, put a clone

946 # of the arvados tree in {git_repositories_dir}/arvados.git or

947 # {git_repositories_dir}/arvados/.git

948 Repositories: /var/lib/arvados/git/repositories

949

950 TLS:

951 # Use "file:///var/lib/acme/live/example.com/cert" and

952 # ".../privkey" to load externally managed certificates.

953 Certificate: ""

954 Key: ""

955

956 # Accept invalid certificates when connecting to servers. Never

957 # use this in production.

958 Insecure: false

959

960 ACME:

961 # Obtain certificates automatically for ExternalURL domains

962 # using an ACME server and http-01 validation.

963 #

964 # To use Let's Encrypt, specify "LE". To use the Let's

965 # Encrypt staging environment, specify "LE-staging". To use a

966 # different ACME server, specify the full directory URL

967 # ("https://...").

968 #

969 # Note: this feature is not yet implemented in released

970 # versions, only in the alpha/prerelease arvados-server-easy

971 # package.

972 #

973 # Implies agreement with the server's terms of service.

974 Server: ""

975

976 Containers:

977 # List of supported Docker Registry image formats that compute nodes

978 # are able to use. `arv keep docker` will error out if a user tries

979 # to store an image with an unsupported format. Use an empty array

980 # to skip the compatibility check (and display a warning message to

981 # that effect).

982 #

983 # Example for sites running docker < 1.10: {"v1": {}}

984 # Example for sites running docker >= 1.10: {"v2": {}}

985 # Example for disabling check: {}

986 SupportedDockerImageFormats:

987 "v2": {}

988 SAMPLE: {}

989

990 # Include details about job reuse decisions in the server log. This

991 # causes additional database queries to run, so it should not be

992 # enabled unless you expect to examine the resulting logs for

993 # troubleshooting purposes.

994 LogReuseDecisions: false

995

996 # Default value for keep_cache_ram of a container's

997 # runtime_constraints. Note: this gets added to the RAM request

998 # used to allocate a VM or submit an HPC job.

999 #

1000 # If this is zero, container requests that don't specify RAM or

1001 # disk cache size will use a disk cache, sized to the

1002 # container's RAM requirement (but with minimum 2 GiB and

1003 # maximum 32 GiB).

1004 #

1005 # Note: If you change this value, containers that used the previous

1006 # default value will only be reused by container requests that

1007 # explicitly specify the previous value in their keep_cache_ram

1008 # runtime constraint.

1009 DefaultKeepCacheRAM: 0

1010

1011 # Number of times a container can be unlocked before being

1012 # automatically cancelled.

1013 MaxDispatchAttempts: 5

1014

1015 # Default value for container_count_max for container requests. This is the

1016 # number of times Arvados will create a new container to satisfy a container

1017 # request. If a container is cancelled it will retry a new container if

1018 # container_count < container_count_max on any container requests associated

1019 # with the cancelled container.

1020 MaxRetryAttempts: 3

1021

1022 # Schedule all child containers on preemptible instances (e.g. AWS

1023 # Spot Instances) even if not requested by the submitter.

1024 #

1025 # If false, containers are scheduled on preemptible instances

1026 # only when requested by the submitter.

1027 #

1028 # This flag is ignored if no preemptible instance types are

1029 # configured, and has no effect on top-level containers.

1030 AlwaysUsePreemptibleInstances: false

1031

1032 # Automatically add a preemptible variant for every

1033 # non-preemptible entry in InstanceTypes below. The maximum bid

1034 # price for the preemptible variant will be the non-preemptible

1035 # price multiplied by PreemptiblePriceFactor. If 0, preemptible

1036 # variants are not added automatically.

1037 #

1038 # A price factor of 1.0 is a reasonable starting point.

1039 PreemptiblePriceFactor: 0

1040

1041 # PEM encoded SSH key (RSA, DSA, or ECDSA) used by the

1042 # cloud dispatcher for executing containers on worker VMs.

1043 # Begins with "-----BEGIN RSA PRIVATE KEY-----\n"

1044 # and ends with "\n-----END RSA PRIVATE KEY-----\n".

1045 DispatchPrivateKey: ""

1046

1047 # Maximum time to wait for workers to come up before abandoning

1048 # stale locks from a previous dispatch process.

1049 StaleLockTimeout: 1m

1050

1051 # The crunch-run command used to start a container on a worker node.

1052 #

1053 # When dispatching to cloud VMs, this is used only if

1054 # DeployRunnerBinary in the CloudVMs section is set to the empty

1055 # string.

1056 CrunchRunCommand: "crunch-run"

1057

1058 # Extra arguments to add to crunch-run invocation

1059 # Example: ["--cgroup-parent-subsystem=memory"]

1060 CrunchRunArgumentsList: []

1061

1062 # Extra RAM to reserve on the node, in addition to

1063 # the amount specified in the container's RuntimeConstraints

1064 ReserveExtraRAM: 550MiB

1065

1066 # Minimum time between two attempts to run the same container

1067 MinRetryPeriod: 0s

1068

1069 # Container runtime: "docker" (default) or "singularity"

1070 RuntimeEngine: docker

1071

1072 # When running a container, run a dedicated keepstore process,

1073 # using the specified number of 64 MiB memory buffers per

1074 # allocated CPU core (VCPUs in the container's runtime

1075 # constraints). The dedicated keepstore handles I/O for

1076 # collections mounted in the container, as well as saving

1077 # container logs.

1078 #

1079 # A zero value disables this feature.

1080 #

1081 # In order for this feature to be activated, no volume may use

1082 # AccessViaHosts, and no writable volume may have Replication

1083 # lower than Collections.DefaultReplication. If these

1084 # requirements are not satisfied, the feature is disabled

1085 # automatically regardless of the value given here.

1086 #

1087 # When an HPC dispatcher is in use (see SLURM and LSF sections),

1088 # this feature depends on the operator to ensure an up-to-date

1089 # cluster configuration file (/etc/arvados/config.yml) is

1090 # available on all compute nodes. If it is missing or not

1091 # readable by the crunch-run user, the feature will be disabled

1092 # automatically. To read it from a different location, add a

1093 # "-config=/path/to/config.yml" argument to

1094 # CrunchRunArgumentsList above.

1095 #

1096 # When the cloud dispatcher is in use (see CloudVMs section) and

1097 # this configuration is enabled, the entire cluster

1098 # configuration file, including the system root token, is copied

1099 # to the worker node and held in memory for the duration of the

1100 # container.

1101 LocalKeepBlobBuffersPerVCPU: 1

1102

1103 # When running a dedicated keepstore process for a container

1104 # (see LocalKeepBlobBuffersPerVCPU), write keepstore log

1105 # messages to keepstore.txt in the container's log collection.

1106 #

1107 # These log messages can reveal some volume configuration

1108 # details, error messages from the cloud storage provider, etc.,

1109 # which are not otherwise visible to users.

1110 #

1111 # Accepted values:

1112 # * "none" -- no keepstore.txt file

1113 # * "all" -- all logs, including request and response lines

1114 # * "errors" -- all logs except "response" logs with 2xx

1115 # response codes and "request" logs

1116 LocalKeepLogsToContainerLog: none

1117

1118 Logging:

1119 # Periodically (see SweepInterval) Arvados will check for

1120 # containers that have been finished for at least this long,

1121 # and delete their stdout, stderr, arv-mount, crunch-run, and

1122 # crunchstat logs from the logs table.

1123 MaxAge: 720h

1124

1125 # How often to delete cached log entries for finished

1126 # containers (see MaxAge).

1127 SweepInterval: 12h

1128

1129 # These two settings control how frequently log events are flushed to the

1130 # database. Log lines are buffered until either crunch_log_bytes_per_event

1131 # has been reached or crunch_log_seconds_between_events has elapsed since

1132 # the last flush.

1133 LogBytesPerEvent: 4096

1134 LogSecondsBetweenEvents: 5s

1135

1136 # The sample period for throttling logs.

1137 LogThrottlePeriod: 60s

1138

1139 # Maximum number of bytes that job can log over crunch_log_throttle_period

1140 # before being silenced until the end of the period.

1141 LogThrottleBytes: 65536

1142

1143 # Maximum number of lines that job can log over crunch_log_throttle_period

1144 # before being silenced until the end of the period.

1145 LogThrottleLines: 1024

1146

1147 # Maximum bytes that may be logged by a single job. Log bytes that are

1148 # silenced by throttling are not counted against this total.

1149 LimitLogBytesPerJob: 67108864

1150

1151 LogPartialLineThrottlePeriod: 5s

1152

1153 # Container logs are written to Keep and saved in a

1154 # collection, which is updated periodically while the

1155 # container runs. This value sets the interval between

1156 # collection updates.

1157 LogUpdatePeriod: 30m

1158

1159 # The log collection is also updated when the specified amount of

1160 # log data (given in bytes) is produced in less than one update

1161 # period.

1162 LogUpdateSize: 32MiB

1163

1164 ShellAccess:

1165 # An admin user can use "arvados-client shell" to start an

1166 # interactive shell (with any user ID) in any running

1167 # container.

1168 Admin: false

1169

1170 # Any user can use "arvados-client shell" to start an

1171 # interactive shell (with any user ID) in any running

1172 # container that they started, provided it isn't also

1173 # associated with a different user's container request.

1174 #

1175 # Interactive sessions make it easy to alter the container's

1176 # runtime environment in ways that aren't recorded or

1177 # reproducible. Consider the implications for automatic

1178 # container reuse before enabling and using this feature. In

1179 # particular, note that starting an interactive session does

1180 # not disqualify a container from being reused by a different

1181 # user/workflow in the future.

1182 User: false

1183

1184 SLURM:

1185 PrioritySpread: 0

1186 SbatchArgumentsList: []

1187 SbatchEnvironmentVariables:

1188 SAMPLE: ""

1189 Managed:

1190 # Path to dns server configuration directory

1191 # (e.g. /etc/unbound.d/conf.d). If false, do not write any config

1192 # files or touch restart.txt (see below).

1193 DNSServerConfDir: ""

1194

1195 # Template file for the dns server host snippets. See

1196 # unbound.template in this directory for an example. If false, do

1197 # not write any config files.

1198 DNSServerConfTemplate: ""

1199

1200 # String to write to {dns_server_conf_dir}/restart.txt (with a

1201 # trailing newline) after updating local data. If false, do not

1202 # open or write the restart.txt file.

1203 DNSServerReloadCommand: ""

1204

1205 # Command to run after each DNS update. Template variables will be

1206 # substituted; see the "unbound" example below. If false, do not run

1207 # a command.

1208 DNSServerUpdateCommand: ""

1209

1210 ComputeNodeDomain: ""

1211 ComputeNodeNameservers:

1212 "192.168.1.1": {}

1213 SAMPLE: {}

1214

1215 # Hostname to assign to a compute node when it sends a "ping" and the

1216 # hostname in its Node record is nil.

1217 # During bootstrapping, the "ping" script is expected to notice the

1218 # hostname given in the ping response, and update its unix hostname

1219 # accordingly.

1220 # If false, leave the hostname alone (this is appropriate if your compute

1221 # nodes' hostnames are already assigned by some other mechanism).

1222 #

1223 # One way or another, the hostnames of your node records should agree

1224 # with your DNS records and your /etc/slurm-llnl/slurm.conf files.

1225 #

1226 # Example for compute0000, compute0001, ....:

1227 # assign_node_hostname: compute%<slot_number>04d

1228 # (See http://ruby-doc.org/core-2.2.2/Kernel.html#method-i-format for more.)

1229 AssignNodeHostname: "compute%<slot_number>d"

1230

1231 LSF:

1232 # Arguments to bsub when submitting Arvados containers as LSF jobs.

1233 #

1234 # Template variables starting with % will be substituted as follows:

1235 #

1236 # %U uuid

1237 # %C number of VCPUs

1238 # %M memory in MB

1239 # %T tmp in MB

1240 # %G number of GPU devices (runtime_constraints.cuda.device_count)

1241 #

1242 # Use %% to express a literal %. The %%J in the default will be changed

1243 # to %J, which is interpreted by bsub itself.

1244 #

1245 # Note that the default arguments cause LSF to write two files

1246 # in /tmp on the compute node each time an Arvados container

1247 # runs. Ensure you have something in place to delete old files

1248 # from /tmp, or adjust the "-o" and "-e" arguments accordingly.

1249 BsubArgumentsList: ["-o", "/tmp/crunch-run.%%J.out", "-e", "/tmp/crunch-run.%%J.err", "-J", "%U", "-n", "%C", "-D", "%MMB", "-R", "rusage[mem=%MMB:tmp=%TMB] span[hosts=1]", "-R", "select[mem>=%MMB]", "-R", "select[tmp>=%TMB]", "-R", "select[ncpus>=%C]"]

1250

1251 # Arguments that will be appended to the bsub command line

1252 # when submitting Arvados containers as LSF jobs with

1253 # runtime_constraints.cuda.device_count > 0

1254 BsubCUDAArguments: ["-gpu", "num=%G"]

1255

1256 # Use sudo to switch to this user account when submitting LSF

1257 # jobs.

1258 #

1259 # This account must exist on the hosts where LSF jobs run

1260 # ("execution hosts"), as well as on the host where the

1261 # Arvados LSF dispatcher runs ("submission host").

1262 BsubSudoUser: "crunch"

1263

1264 JobsAPI:

1265 # Enable the legacy 'jobs' API (crunch v1). This value must be a string.

1266 #

1267 # Note: this only enables read-only access, creating new

1268 # legacy jobs and pipelines is not supported.

1269 #

1270 # 'auto' -- (default) enable the Jobs API only if it has been used before

1271 # (i.e., there are job records in the database)

1272 # 'true' -- enable the Jobs API despite lack of existing records.

1273 # 'false' -- disable the Jobs API despite presence of existing records.

1274 Enable: 'auto'

1275

1276 # Git repositories must be readable by api server, or you won't be

1277 # able to submit crunch jobs. To pass the test suites, put a clone

1278 # of the arvados tree in {git_repositories_dir}/arvados.git or

1279 # {git_repositories_dir}/arvados/.git

1280 GitInternalDir: /var/lib/arvados/internal.git

1281

1282 CloudVMs:

1283 # Enable the cloud scheduler.

1284 Enable: false

1285

1286 # Name/number of port where workers' SSH services listen.

1287 SSHPort: "22"

1288

1289 # Interval between queue polls.

1290 PollInterval: 10s

1291

1292 # Shell command to execute on each worker to determine whether

1293 # the worker is booted and ready to run containers. It should

1294 # exit zero if the worker is ready.

1295 BootProbeCommand: "systemctl is-system-running"

1296

1297 # Minimum interval between consecutive probes to a single

1298 # worker.

1299 ProbeInterval: 10s

1300

1301 # Maximum probes per second, across all workers in a pool.

1302 MaxProbesPerSecond: 10

1303

1304 # Time before repeating SIGTERM when killing a container.

1305 TimeoutSignal: 5s

1306

1307 # Time to give up on a process (most likely arv-mount) that

1308 # still holds a container lockfile after its main supervisor

1309 # process has exited, and declare the instance broken.

1310 TimeoutStaleRunLock: 5s

1311

1312 # Time to give up on SIGTERM and write off the worker.

1313 TimeoutTERM: 2m

1314

1315 # Maximum create/destroy-instance operations per second (0 =

1316 # unlimited).

1317 MaxCloudOpsPerSecond: 10

1318

1319 # Maximum concurrent instance creation operations (0 = unlimited).

1320 #

1321 # MaxConcurrentInstanceCreateOps limits the number of instance creation

1322 # requests that can be in flight at any one time, whereas

1323 # MaxCloudOpsPerSecond limits the number of create/destroy operations

1324 # that can be started per second.

1325 #

1326 # Because the API for instance creation on Azure is synchronous, it is

1327 # recommended to increase MaxConcurrentInstanceCreateOps when running

1328 # on Azure. When using managed images, a value of 20 would be

1329 # appropriate. When using Azure Shared Image Galeries, it could be set

1330 # higher. For more information, see

1331 # https://docs.microsoft.com/en-us/azure/virtual-machines/linux/capture-image

1332 #

1333 # MaxConcurrentInstanceCreateOps can be increased for other cloud

1334 # providers too, if desired.

1335 MaxConcurrentInstanceCreateOps: 1

1336

1337 # The maximum number of instances to run at a time, or 0 for

1338 # unlimited.

1339 #

1340 # If more instances than this are already running and busy

1341 # when the dispatcher starts up, the running containers will

1342 # be allowed to finish before the excess instances are shut

1343 # down.

1344 MaxInstances: 64

1345

1346 # Maximum fraction of CloudVMs.MaxInstances allowed to run

1347 # "supervisor" containers at any given time. A supervisor is a

1348 # container whose purpose is mainly to submit and manage other

1349 # containers, such as arvados-cwl-runner workflow runner.

1350 #

1351 # If there is a hard limit on the amount of concurrent

1352 # containers that the cluster can run, it is important to

1353 # avoid crowding out the containers doing useful work with

1354 # containers who just create more work.

1355 #

1356 # For example, with the default MaxInstances of 64, it will

1357 # schedule at most floor(64*0.30) = 19 concurrent workflows,

1358 # ensuring 45 slots are available for work.

1359 SupervisorFraction: 0.30

1360

1361 # Interval between cloud provider syncs/updates ("list all

1362 # instances").

1363 SyncInterval: 1m

1364

1365 # Time to leave an idle worker running (in case new containers

1366 # appear in the queue that it can run) before shutting it

1367 # down.

1368 TimeoutIdle: 1m

1369

1370 # Time to wait for a new worker to boot (i.e., pass

1371 # BootProbeCommand) before giving up and shutting it down.

1372 TimeoutBooting: 10m

1373

1374 # Maximum time a worker can stay alive with no successful

1375 # probes before being automatically shut down.

1376 TimeoutProbe: 10m

1377

1378 # Time after shutting down a worker to retry the

1379 # shutdown/destroy operation.

1380 TimeoutShutdown: 10s

1381

1382 # Worker VM image ID.

1383 # (aws) AMI identifier

1384 # (azure) managed disks: the name of the managed disk image

1385 # (azure) shared image gallery: the name of the image definition. Also

1386 # see the SharedImageGalleryName and SharedImageGalleryImageVersion fields.

1387 # (azure) unmanaged disks (deprecated): the complete URI of the VHD, e.g.

1388 # https://xxxxx.blob.core.windows.net/system/Microsoft.Compute/Images/images/xxxxx.vhd

1389 ImageID: ""

1390

1391 # An executable file (located on the dispatcher host) to be

1392 # copied to cloud instances at runtime and used as the

1393 # container runner/supervisor. The default value is the

1394 # dispatcher program itself.

1395 #

1396 # Use the empty string to disable this step: nothing will be

1397 # copied, and cloud instances are assumed to have a suitable

1398 # version of crunch-run installed; see CrunchRunCommand above.

1399 DeployRunnerBinary: "/proc/self/exe"

1400

1401 # Tags to add on all resources (VMs, NICs, disks) created by

1402 # the container dispatcher. (Arvados's own tags --

1403 # InstanceType, IdleBehavior, and InstanceSecret -- will also

1404 # be added.)

1405 ResourceTags:

1406 SAMPLE: "tag value"

1407

1408 # Prefix for predefined tags used by Arvados (InstanceSetID,

1409 # InstanceType, InstanceSecret, IdleBehavior). With the

1410 # default value "Arvados", tags are "ArvadosInstanceSetID",

1411 # "ArvadosInstanceSecret", etc.

1412 #

1413 # This should only be changed while no cloud resources are in

1414 # use and the cloud dispatcher is not running. Otherwise,

1415 # VMs/resources that were added using the old tag prefix will

1416 # need to be detected and cleaned up manually.

1417 TagKeyPrefix: Arvados

1418

1419 # Cloud driver: "azure" (Microsoft Azure), "ec2" (Amazon AWS),

1420 # or "loopback" (run containers on dispatch host for testing

1421 # purposes).

1422 Driver: ec2

1423

1424 # Cloud-specific driver parameters.

1425 DriverParameters:

1426

1427 # (ec2) Credentials. Omit or leave blank if using IAM role.

1428 AccessKeyID: ""

1429 SecretAccessKey: ""

1430

1431 # (ec2) Instance configuration.

1432 SecurityGroupIDs:

1433 "SAMPLE": {}

1434 SubnetID: ""

1435 Region: ""

1436 EBSVolumeType: gp2

1437 AdminUsername: debian

1438 # (ec2) name of the IAMInstanceProfile for instances started by

1439 # the cloud dispatcher. Leave blank when not needed.

1440 IAMInstanceProfile: ""

1441

1442 # (ec2) how often to look up spot instance pricing data

1443 # (only while running spot instances) for the purpose of

1444 # calculating container cost estimates. A value of 0

1445 # disables spot price lookups entirely.

1446 SpotPriceUpdateInterval: 24h

1447

1448 # (ec2) per-GiB-month cost of EBS volumes. Matches

1449 # EBSVolumeType. Used to account for AddedScratch when

1450 # calculating container cost estimates. Note that

1451 # https://aws.amazon.com/ebs/pricing/ defines GB to mean

1452 # GiB, so an advertised price $0.10/GB indicates a real

1453 # price of $0.10/GiB and can be entered here as 0.10.

1454 EBSPrice: 0.10

1455

1456 # (azure) Credentials.

1457 SubscriptionID: ""

1458 ClientID: ""

1459 ClientSecret: ""

1460 TenantID: ""

1461

1462 # (azure) Instance configuration.

1463 CloudEnvironment: AzurePublicCloud

1464 Location: centralus

1465

1466 # (azure) The resource group where the VM and virtual NIC will be

1467 # created.

1468 ResourceGroup: ""

1469

1470 # (azure) The resource group of the Network to use for the virtual

1471 # NIC (if different from ResourceGroup)

1472 NetworkResourceGroup: ""

1473 Network: ""

1474 Subnet: ""

1475

1476 # (azure) managed disks: The resource group where the managed disk

1477 # image can be found (if different from ResourceGroup).

1478 ImageResourceGroup: ""

1479

1480 # (azure) shared image gallery: the name of the gallery

1481 SharedImageGalleryName: ""

1482 # (azure) shared image gallery: the version of the image definition

1483 SharedImageGalleryImageVersion: ""

1484

1485 # (azure) unmanaged disks (deprecated): Where to store the VM VHD blobs

1486 StorageAccount: ""

1487 BlobContainer: ""

1488

1489 # (azure) How long to wait before deleting VHD and NIC

1490 # objects that are no longer being used.

1491 DeleteDanglingResourcesAfter: 20s

1492

1493 # Account (that already exists in the VM image) that will be

1494 # set up with an ssh authorized key to allow the compute

1495 # dispatcher to connect.

1496 AdminUsername: arvados

1497

1498 InstanceTypes:

1499

1500 # Use the instance type name as the key (in place of "SAMPLE" in

1501 # this sample entry).

1502 SAMPLE:

1503 # Cloud provider's instance type. Defaults to the configured type name.

1504 ProviderType: ""

1505 VCPUs: 1

1506 RAM: 128MiB

1507 IncludedScratch: 16GB

1508 AddedScratch: 0

1509 # Hourly price ($), used to select node types for containers,

1510 # and to calculate estimated container costs. For spot

1511 # instances on EC2, this is also used as the maximum price

1512 # when launching spot instances, while the estimated container

1513 # cost is computed based on the current spot price according

1514 # to AWS. On Azure, and on-demand instances on EC2, the price

1515 # given here is used to compute container cost estimates.

1516 Price: 0.1

1517 Preemptible: false

1518 # Include this section if the node type includes GPU (CUDA) support

1519 CUDA:

1520 DriverVersion: "11.0"

1521 HardwareCapability: "9.0"

1522 DeviceCount: 1

1523

1524 StorageClasses:

1525

1526 # If you use multiple storage classes, specify them here, using

1527 # the storage class name as the key (in place of "SAMPLE" in

1528 # this sample entry).

1529 #

1530 # Further info/examples:

1531 # https://doc.arvados.org/admin/storage-classes.html

1532 SAMPLE:

1533

1534 # Priority determines the order volumes should be searched

1535 # when reading data, in cases where a keepstore server has

1536 # access to multiple volumes with different storage classes.

1537 Priority: 0

1538

1539 # Default determines which storage class(es) should be used

1540 # when a user/client writes data or saves a new collection

1541 # without specifying storage classes.

1542 #

1543 # If any StorageClasses are configured, at least one of them

1544 # must have Default: true.

1545 Default: true

1546

1547 Volumes:

1548 SAMPLE:

1549 # AccessViaHosts specifies which keepstore processes can read

1550 # and write data on the volume.

1551 #

1552 # For a local filesystem, AccessViaHosts has one entry,

1553 # indicating which server the filesystem is located on.

1554 #

1555 # For a network-attached backend accessible by all keepstore

1556 # servers, like a cloud storage bucket or an NFS mount,

1557 # AccessViaHosts can be empty/omitted.

1558 #

1559 # Further info/examples:

1560 # https://doc.arvados.org/install/configure-fs-storage.html

1561 # https://doc.arvados.org/install/configure-s3-object-storage.html

1562 # https://doc.arvados.org/install/configure-azure-blob-storage.html

1563 AccessViaHosts:

1564 SAMPLE:

1565 ReadOnly: false

1566 "http://host1.example:25107": {}

1567 ReadOnly: false

1568 Replication: 1

1569 StorageClasses:

1570 # If you have configured storage classes (see StorageClasses

1571 # section above), add an entry here for each storage class

1572 # satisfied by this volume.

1573 SAMPLE: true

1574 Driver: S3

1575 DriverParameters:

1576 # for s3 driver -- see

1577 # https://doc.arvados.org/install/configure-s3-object-storage.html

1578 IAMRole: aaaaa

1579 AccessKeyID: aaaaa

1580 SecretAccessKey: aaaaa

1581 Endpoint: ""

1582 Region: us-east-1

1583 Bucket: aaaaa

1584 LocationConstraint: false

1585 V2Signature: false

1586 IndexPageSize: 1000

1587 ConnectTimeout: 1m

1588 ReadTimeout: 10m

1589 RaceWindow: 24h

1590 PrefixLength: 0

1591

1592 # For S3 driver, potentially unsafe tuning parameter,

1593 # intentionally excluded from main documentation.

1594 #

1595 # Enable deletion (garbage collection) even when the

1596 # configured BlobTrashLifetime is zero. WARNING: eventual

1597 # consistency may result in race conditions that can cause

1598 # data loss. Do not enable this unless you understand and

1599 # accept the risk.

1600 UnsafeDelete: false

1601

1602 # for azure driver -- see

1603 # https://doc.arvados.org/install/configure-azure-blob-storage.html

1604 StorageAccountName: aaaaa

1605 StorageAccountKey: aaaaa

1606 StorageBaseURL: core.windows.net

1607 ContainerName: aaaaa

1608 RequestTimeout: 30s

1609 ListBlobsRetryDelay: 10s

1610 ListBlobsMaxAttempts: 10

1611 MaxGetBytes: 0

1612 WriteRaceInterval: 15s

1613 WriteRacePollTime: 1s

1614

1615 # for local directory driver -- see

1616 # https://doc.arvados.org/install/configure-fs-storage.html

1617 Root: /var/lib/arvados/keep-data

1618

1619 # For local directory driver, potentially confusing tuning

1620 # parameter, intentionally excluded from main documentation.

1621 #

1622 # When true, read and write operations (for whole 64MiB

1623 # blocks) on an individual volume will queued and issued

1624 # serially. When false, read and write operations will be

1625 # issued concurrently.

1626 #

1627 # May possibly improve throughput if you have physical spinning disks

1628 # and experience contention when there are multiple requests

1629 # to the same volume.

1630 #

1631 # Otherwise, when using SSDs, RAID, or a shared network filesystem, you

1632 # should leave this alone.

1633 Serialize: false

1634

1635 Mail:

1636 MailchimpAPIKey: ""

1637 MailchimpListID: ""

1638 SendUserSetupNotificationEmail: true

1639

1640 # Bug/issue report notification to and from addresses

1641 IssueReporterEmailFrom: "arvados@example.com"

1642 IssueReporterEmailTo: "arvados@example.com"

1643 SupportEmailAddress: "arvados@example.com"

1644

1645 # Generic issue email from

1646 EmailFrom: "arvados@example.com"

1647 RemoteClusters:

1648 "*":

1649 Host: ""

1650 Proxy: false

1651 Scheme: https

1652 Insecure: false

1653 ActivateUsers: false

1654 SAMPLE:

1655 # API endpoint host or host:port; default is {id}.arvadosapi.com

1656 Host: sample.arvadosapi.com

1657

1658 # Perform a proxy request when a local client requests an

1659 # object belonging to this remote.

1660 Proxy: false

1661

1662 # Default "https". Can be set to "http" for testing.

1663 Scheme: https

1664

1665 # Disable TLS verify. Can be set to true for testing.

1666 Insecure: false

1667

1668 # When users present tokens issued by this remote cluster, and

1669 # their accounts are active on the remote cluster, activate

1670 # them on this cluster too.

1671 ActivateUsers: false

1672

1673 Workbench:

1674 # Workbench1 configs

1675 Theme: default

1676 ActivationContactLink: mailto:info@arvados.org

1677 ArvadosDocsite: https://doc.arvados.org

1678 ArvadosPublicDataDocURL: https://playground.arvados.org/projects/public

1679 ShowUserAgreementInline: false

1680 SecretKeyBase: ""

1681

1682 # Set this configuration to true to avoid providing an easy way for users

1683 # to share data with unauthenticated users; this may be necessary on

1684 # installations where strict data access controls are needed.

1685 DisableSharingURLsUI: false

1686

1687 # Scratch directory used by the remote repository browsing

1688 # feature. If it doesn't exist, it (and any missing parents) will be

1689 # created using mkdir_p.

1690 RepositoryCache: /var/www/arvados-workbench/current/tmp/git

1691

1692 # Below is a sample setting of user_profile_form_fields config parameter.

1693 # This configuration parameter should be set to either false (to disable) or

1694 # to a map as shown below.

1695 # Configure the map of input fields to be displayed in the profile page

1696 # using the attribute "key" for each of the input fields.

1697 # This sample shows configuration with one required and one optional form fields.

1698 # For each of these input fields:

1699 # You can specify "Type" as "text" or "select".

1700 # List the "Options" to be displayed for each of the "select" menu.

1701 # Set "Required" as "true" for any of these fields to make them required.

1702 # If any of the required fields are missing in the user's profile, the user will be

1703 # redirected to the profile page before they can access any Workbench features.

1704 UserProfileFormFields:

1705 SAMPLE:

1706 Type: select

1707 FormFieldTitle: Best color

1708 FormFieldDescription: your favorite color

1709 Required: false

1710 Position: 1

1711 Options:

1712 red: {}

1713 blue: {}

1714 green: {}

1715 SAMPLE: {}

1716

1717 # exampleTextValue: # key that will be set in properties

1718 # Type: text #

1719 # FormFieldTitle: ""

1720 # FormFieldDescription: ""

1721 # Required: true

1722 # Position: 1

1723 # exampleOptionsValue:

1724 # Type: select

1725 # FormFieldTitle: ""

1726 # FormFieldDescription: ""

1727 # Required: true

1728 # Position: 1

1729 # Options:

1730 # red: {}

1731 # blue: {}

1732 # yellow: {}

1733

1734 # Use "UserProfileFormMessage to configure the message you want

1735 # to display on the profile page.

1736 UserProfileFormMessage: 'Welcome to Arvados. All <span style="color:red">required fields</span> must be completed before you can proceed.'

1737

1738 # Mimetypes of applications for which the view icon

1739 # would be enabled in a collection's show page.

1740 # It is sufficient to list only applications here.

1741 # No need to list text and image types.

1742 ApplicationMimetypesWithViewIcon:

1743 cwl: {}

1744 fasta: {}

1745 go: {}

1746 javascript: {}

1747 json: {}

1748 pdf: {}

1749 python: {}

1750 x-python: {}

1751 r: {}

1752 rtf: {}

1753 sam: {}

1754 x-sh: {}

1755 vnd.realvnc.bed: {}

1756 xml: {}

1757 xsl: {}

1758 SAMPLE: {}

1759

1760 # The maximum number of bytes to load in the log viewer

1761 LogViewerMaxBytes: 1M

1762

1763 # When anonymous_user_token is configured, show public projects page

1764 EnablePublicProjectsPage: true

1765

1766 # By default, disable the "Getting Started" popup which is specific to Arvados playground

1767 EnableGettingStartedPopup: false

1768

1769 # Ask Arvados API server to compress its response payloads.

1770 APIResponseCompression: true

1771

1772 # Timeouts for API requests.

1773 APIClientConnectTimeout: 2m

1774 APIClientReceiveTimeout: 5m

1775

1776 # Maximum number of historic log records of a running job to fetch

1777 # and display in the Log tab, while subscribing to web sockets.

1778 RunningJobLogRecordsToFetch: 2000

1779

1780 # In systems with many shared projects, loading of dashboard and topnav

1781 # can be slow due to collections indexing; use the following parameters

1782 # to suppress these properties

1783 ShowRecentCollectionsOnDashboard: true

1784 ShowUserNotifications: true

1785

1786 # Enable/disable "multi-site search" in top nav ("true"/"false"), or

1787 # a link to the multi-site search page on a "home" Workbench site.

1788 #

1789 # Example:

1790 # https://workbench.zzzzz.arvadosapi.com/collections/multisite

1791 MultiSiteSearch: ""

1792

1793 # Should workbench allow management of local git repositories? Set to false if

1794 # the jobs api is disabled and there are no local git repositories.

1795 Repositories: true

1796

1797 SiteName: Arvados Workbench

1798 ProfilingEnabled: false

1799

1800 # This is related to obsolete Google OpenID 1.0 login

1801 # but some workbench stuff still expects it to be set.

1802 DefaultOpenIdPrefix: "https://www.google.com/accounts/o8/id"

1803

1804 # Workbench2 configs

1805 FileViewersConfigURL: ""

1806

1807 # Idle time after which the user's session will be auto closed.

1808 # This feature is disabled when set to zero.

1809 IdleTimeout: 0s

1810

1811 # UUID of a collection. This collection should be shared with

1812 # all users. Workbench will look for a file "banner.html" in

1813 # this collection and display its contents (should be

1814 # HTML-formatted text) when users first log in to Workbench.

1815 BannerUUID: ""

1816

1817 # Workbench welcome screen, this is HTML text that will be

1818 # incorporated directly onto the page.

1819 WelcomePageHTML: |

1820 <img src="/arvados-logo-big.png" style="width: 20%; float: right; padding: 1em;" />

1821 <h2>Please log in.</h2>

1822

1823 <p>If you have never used Arvados Workbench before, logging in

1824 for the first time will automatically create a new

1825 account.</p>

1826

1827 <i>Arvados Workbench uses your information only for

1828 identification, and does not retrieve any other personal

1829 information.</i>

1830

1831 # Workbench screen displayed to inactive users. This is HTML

1832 # text that will be incorporated directly onto the page.

1833 InactivePageHTML: |

1834 <img src="/arvados-logo-big.png" style="width: 20%; float: right; padding: 1em;" />

1835 <h3>Hi! You're logged in, but...</h3>

1836 <p>Your account is inactive.</p>

1837 <p>An administrator must activate your account before you can get

1838 any further.</p>

1839

1840 # Connecting to Arvados shell VMs tends to be site-specific.

1841 # Put any special instructions here. This is HTML text that will

1842 # be incorporated directly onto the Workbench page.

1843 SSHHelpPageHTML: |

1844 <a href="https://doc.arvados.org/user/getting_started/ssh-access-unix.html">Accessing an Arvados VM with SSH</a> (generic instructions).

1845 Site configurations vary. Contact your local cluster administrator if you have difficulty accessing an Arvados shell node.

1846

1847 # Sample text if you are using a "switchyard" ssh proxy.

1848 # Replace "zzzzz" with your Cluster ID.

1849 #SSHHelpPageHTML: |

1850 # <p>Add a section like this to your SSH configuration file ( <i>~/.ssh/config</i>):</p>

1851 # <pre>Host *.zzzzz

1852 # TCPKeepAlive yes

1853 # ServerAliveInterval 60

1854 # ProxyCommand ssh -p2222 turnout@switchyard.zzzzz.arvadosapi.com -x -a $SSH_PROXY_FLAGS %h

1855 # </pre>

1856

1857 # If you are using a switchyard ssh proxy, shell node hostnames

1858 # may require a special hostname suffix. In the sample ssh

1859 # configuration above, this would be ".zzzzz"

1860 # This is added to the hostname in the "command line" column

1861 # the Workbench "shell VMs" page.

1862 #

1863 # If your shell nodes are directly accessible by users without a

1864 # proxy and have fully qualified host names, you should leave

1865 # this blank.

1866 SSHHelpHostSuffix: ""

1867

1868 # (Experimental) Restart services automatically when config file

1869 # changes are detected. Only supported by `arvados-server boot` in

1870 # dev/test mode.

1871 AutoReloadConfig: false