git.arvados.org - arvados.git/blob - lib/config/config.default.yml

2 #

3 # SPDX-License-Identifier: AGPL-3.0

4

5 # Do not use this file for site configuration. Create

6 # /etc/arvados/config.yml instead.

7 #

8 # The order of precedence (highest to lowest):

9 # 1. Legacy component-specific config files (deprecated)

10 # 2. /etc/arvados/config.yml

11 # 3. config.default.yml

12

13 Clusters:

14 xxxxx:

15 # Token used internally by Arvados components to authenticate to

16 # one another. Use a string of at least 50 random alphanumerics.

17 SystemRootToken: ""

18

19 # Token to be included in all healthcheck requests. Disabled by default.

20 # Server expects request header of the format "Authorization: Bearer xxx"

21 ManagementToken: ""

22

23 Services:

24

25 # Each of the service sections below specifies InternalURLs

26 # (each with optional ListenURL) and ExternalURL.

27 #

28 # InternalURLs specify how other Arvados service processes will

29 # connect to the service. Typically these use internal hostnames

30 # and high port numbers. Example:

31 #

32 # InternalURLs:

33 # "http://host1.internal.example:12345": {}

34 # "http://host2.internal.example:12345": {}

35 #

36 # ListenURL specifies the address and port the service process's

37 # HTTP server should listen on, if different from the

38 # InternalURL itself. Example, using an intermediate TLS proxy:

39 #

40 # InternalURLs:

41 # "https://host1.internal.example":

42 # ListenURL: "http://10.0.0.7:12345"

43 #

44 # When there are multiple InternalURLs configured, the service

45 # process will try listening on each InternalURLs (using

46 # ListenURL if provided) until one works. If you use a ListenURL

47 # like "0.0.0.0" which can be bound on any machine, use an

48 # environment variable

49 # ARVADOS_SERVICE_INTERNAL_URL=http://host1.internal.example to

50 # control which entry to use.

51 #

52 # ExternalURL specifies how applications/clients will connect to

53 # the service, regardless of whether they are inside or outside

54 # the cluster. Example:

55 #

56 # ExternalURL: "https://keep.zzzzz.example.com/"

57 #

58 # To avoid routing internal traffic through external networks,

59 # use split-horizon DNS for ExternalURL host names: inside the

60 # cluster's private network "host.zzzzz.example.com" resolves to

61 # the host's private IP address, while outside the cluster

62 # "host.zzzzz.example.com" resolves to the host's public IP

63 # address (or its external gateway or load balancer).

64

65 RailsAPI:

66 InternalURLs: {SAMPLE: {ListenURL: ""}}

67 ExternalURL: ""

68 Controller:

69 InternalURLs: {SAMPLE: {ListenURL: ""}}

70 ExternalURL: ""

71 Websocket:

72 InternalURLs: {SAMPLE: {ListenURL: ""}}

73 ExternalURL: ""

74 Keepbalance:

75 InternalURLs: {SAMPLE: {ListenURL: ""}}

76 ExternalURL: ""

77 GitHTTP:

78 InternalURLs: {SAMPLE: {ListenURL: ""}}

79 ExternalURL: ""

80 GitSSH:

81 InternalURLs: {SAMPLE: {ListenURL: ""}}

82 ExternalURL: ""

83 DispatchCloud:

84 InternalURLs: {SAMPLE: {ListenURL: ""}}

85 ExternalURL: ""

86 DispatchLSF:

87 InternalURLs: {SAMPLE: {ListenURL: ""}}

88 ExternalURL: ""

89 DispatchSLURM:

90 InternalURLs: {SAMPLE: {ListenURL: ""}}

91 ExternalURL: ""

92 Keepproxy:

93 InternalURLs: {SAMPLE: {ListenURL: ""}}

94 ExternalURL: ""

95 WebDAV:

96 InternalURLs: {SAMPLE: {ListenURL: ""}}

97 # Base URL for Workbench inline preview. If blank, use

98 # WebDAVDownload instead, and disable inline preview.

99 # If both are empty, downloading collections from workbench

100 # will be impossible.

101 #

102 # It is important to properly configure the download service

103 # to migitate cross-site-scripting (XSS) attacks. A HTML page

104 # can be stored in collection. If an attacker causes a victim

105 # to visit that page through Workbench, it will be rendered by

106 # the browser. If all collections are served at the same

107 # domain, the browser will consider collections as coming from

108 # the same origin and having access to the same browsing data,

109 # enabling malicious Javascript on that page to access Arvados

110 # on behalf of the victim.

111 #

112 # This is mitigating by having separate domains for each

113 # collection, or limiting preview to circumstances where the

114 # collection is not accessed with the user's regular

115 # full-access token.

116 #

117 # Serve preview links using uuid or pdh in subdomain

118 # (requires wildcard DNS and TLS certificate)

119 # https://*.collections.uuid_prefix.arvadosapi.com

120 #

121 # Serve preview links using uuid or pdh in main domain

122 # (requires wildcard DNS and TLS certificate)

123 # https://*--collections.uuid_prefix.arvadosapi.com

124 #

125 # Serve preview links by setting uuid or pdh in the path.

126 # This configuration only allows previews of public data or

127 # collection-sharing links, because these use the anonymous

128 # user token or the token is already embedded in the URL.

129 # Other data must be handled as downloads via WebDAVDownload:

130 # https://collections.uuid_prefix.arvadosapi.com

131 #

132 ExternalURL: ""

133

134 WebDAVDownload:

135 InternalURLs: {SAMPLE: {ListenURL: ""}}

136 # Base URL for download links. If blank, serve links to WebDAV

137 # with disposition=attachment query param. Unlike preview links,

138 # browsers do not render attachments, so there is no risk of XSS.

139 #

140 # If WebDAVDownload is blank, and WebDAV uses a

141 # single-origin form, then Workbench will show an error page

142 #

143 # Serve download links by setting uuid or pdh in the path:

144 # https://download.uuid_prefix.arvadosapi.com

145 #

146 ExternalURL: ""

147

148 Keepstore:

149 InternalURLs:

150 SAMPLE:

151 ListenURL: ""

152 # Rendezvous is normally empty/omitted. When changing the

153 # URL of a Keepstore service, Rendezvous should be set to

154 # the old URL (with trailing slash omitted) to preserve

155 # rendezvous ordering.

156 Rendezvous: ""

157 ExternalURL: ""

158 Composer:

159 InternalURLs: {SAMPLE: {ListenURL: ""}}

160 ExternalURL: ""

161 WebShell:

162 InternalURLs: {SAMPLE: {ListenURL: ""}}

163 # ShellInABox service endpoint URL for a given VM. If empty, do not

164 # offer web shell logins.

165 #

166 # E.g., using a path-based proxy server to forward connections to shell hosts:

167 # https://webshell.uuid_prefix.arvadosapi.com

168 #

169 # E.g., using a name-based proxy server to forward connections to shell hosts:

170 # https://*.webshell.uuid_prefix.arvadosapi.com

171 ExternalURL: ""

172 Workbench1:

173 InternalURLs: {SAMPLE: {ListenURL: ""}}

174 ExternalURL: ""

175 Workbench2:

176 InternalURLs: {SAMPLE: {ListenURL: ""}}

177 ExternalURL: ""

178 Health:

179 InternalURLs: {SAMPLE: {ListenURL: ""}}

180 ExternalURL: ""

181

182 PostgreSQL:

183 # max concurrent connections per arvados server daemon

184 ConnectionPool: 32

185 Connection:

186 # All parameters here are passed to the PG client library in a connection string;

187 # see https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-PARAMKEYWORDS

188 host: ""

189 port: ""

190 user: ""

191 password: ""

192 dbname: ""

193 SAMPLE: ""

194 API:

195 # Limits for how long a client token created by regular users can be valid,

196 # and also is used as a default expiration policy when no expiration date is

197 # specified.

198 # Default value zero means token expirations don't get clamped and no

199 # default expiration is set.

200 MaxTokenLifetime: 0s

201

202 # Maximum size (in bytes) allowed for a single API request. This

203 # limit is published in the discovery document for use by clients.

204 # Note: You must separately configure the upstream web server or

205 # proxy to actually enforce the desired maximum request size on the

206 # server side.

207 MaxRequestSize: 134217728

208

209 # Limit the number of bytes read from the database during an index

210 # request (by retrieving and returning fewer rows than would

211 # normally be returned in a single response).

212 # Note 1: This setting never reduces the number of returned rows to

213 # zero, no matter how big the first data row is.

214 # Note 2: Currently, this is only checked against a specific set of

215 # columns that tend to get large (collections.manifest_text,

216 # containers.mounts, workflows.definition). Other fields (e.g.,

217 # "properties" hashes) are not counted against this limit.

218 MaxIndexDatabaseRead: 134217728

219

220 # Maximum number of items to return when responding to a APIs that

221 # can return partial result sets using limit and offset parameters

222 # (e.g., *.index, groups.contents). If a request specifies a "limit"

223 # parameter higher than this value, this value is used instead.

224 MaxItemsPerResponse: 1000

225

226 # Maximum number of concurrent requests to process concurrently

227 # in a single service process, or 0 for no limit.

228 MaxConcurrentRequests: 64

229

230 # Maximum number of incoming requests to hold in a priority

231 # queue waiting for one of the MaxConcurrentRequests slots to be

232 # free. When the queue is longer than this, respond 503 to the

233 # lowest priority request.

234 #

235 # If MaxQueuedRequests is 0, respond 503 immediately to

236 # additional requests while at the MaxConcurrentRequests limit.

237 MaxQueuedRequests: 64

238

239 # Maximum time a "lock container" request is allowed to wait in

240 # the incoming request queue before returning 503.

241 MaxQueueTimeForLockRequests: 2s

242

243 # Fraction of MaxConcurrentRequests that can be "log create"

244 # messages at any given time. This is to prevent logging

245 # updates from crowding out more important requests.

246 LogCreateRequestFraction: 0.50

247

248 # Maximum number of 64MiB memory buffers per Keepstore server process, or

249 # 0 for no limit. When this limit is reached, up to

250 # (MaxConcurrentRequests - MaxKeepBlobBuffers) HTTP requests requiring

251 # buffers (like GET and PUT) will wait for buffer space to be released.

252 # Any HTTP requests beyond MaxConcurrentRequests will receive an

253 # immediate 503 response.

254 #

255 # MaxKeepBlobBuffers should be set such that (MaxKeepBlobBuffers * 64MiB

256 # * 1.1) fits comfortably in memory. On a host dedicated to running

257 # Keepstore, divide total memory by 88MiB to suggest a suitable value.

258 # For example, if grep MemTotal /proc/meminfo reports MemTotal: 7125440

259 # kB, compute 7125440 / (88 * 1024)=79 and set MaxKeepBlobBuffers: 79

260 MaxKeepBlobBuffers: 128

261

262 # API methods to disable. Disabled methods are not listed in the

263 # discovery document, and respond 404 to all requests.

264 # Example: {"jobs.create":{}, "pipeline_instances.create": {}}

265 DisabledAPIs: {}

266

267 # Interval (seconds) between asynchronous permission view updates. Any

268 # permission-updating API called with the 'async' parameter schedules a an

269 # update on the permission view in the future, if not already scheduled.

270 AsyncPermissionsUpdateInterval: 20s

271

272 # Maximum number of concurrent outgoing requests to make while

273 # serving a single incoming multi-cluster (federated) request.

274 MaxRequestAmplification: 4

275

276 # Maximum wall clock time to spend handling an incoming request.

277 RequestTimeout: 5m

278

279 # Websocket will send a periodic empty event after 'SendTimeout'

280 # if there is no other activity to maintain the connection /

281 # detect dropped connections.

282 SendTimeout: 60s

283

284 WebsocketClientEventQueue: 64

285 WebsocketServerEventQueue: 4

286

287 # Timeout on requests to internal Keep services.

288 KeepServiceRequestTimeout: 15s

289

290 # Vocabulary file path, local to the node running the controller.

291 # This JSON file should contain the description of what's allowed

292 # as object's metadata. Its format is described at:

293 # https://doc.arvados.org/admin/metadata-vocabulary.html

294 VocabularyPath: ""

295

296 # If true, a project must have a non-empty description field in

297 # order to be frozen.

298 FreezeProjectRequiresDescription: false

299

300 # Project properties that must have non-empty values in order to

301 # freeze a project. Example: "property_name": {}

302 FreezeProjectRequiresProperties:

303 SAMPLE: {}

304

305 # If true, only an admin user can un-freeze a project. If false,

306 # any user with "manage" permission can un-freeze.

307 UnfreezeProjectRequiresAdmin: false

308

309 # (Experimental) Use row-level locking on update API calls.

310 LockBeforeUpdate: false

311

312 Users:

313 # Config parameters to automatically setup new users. If enabled,

314 # this users will be able to self-activate. Enable this if you want

315 # to run an open instance where anyone can create an account and use

316 # the system without requiring manual approval.

317 #

318 # The params AutoSetupNewUsersWith* are meaningful only when AutoSetupNewUsers is turned on.

319 # AutoSetupUsernameBlacklist is a list of usernames to be blacklisted for auto setup.

320 AutoSetupNewUsers: false

321 AutoSetupNewUsersWithVmUUID: ""

322 AutoSetupNewUsersWithRepository: false

323 AutoSetupUsernameBlacklist:

324 arvados: {}

325 git: {}

326 gitolite: {}

327 gitolite-admin: {}

328 root: {}

329 syslog: {}

330 SAMPLE: {}

331

332 # When NewUsersAreActive is set to true, new users will be active

333 # immediately. This skips the "self-activate" step which enforces

334 # user agreements. Should only be enabled for development.

335 NewUsersAreActive: false

336

337 # Newly activated users (whether set up by an admin or via

338 # AutoSetupNewUsers) immediately become visible to other active

339 # users.

340 #

341 # On a multi-tenant cluster, where the intent is for users to be

342 # invisible to one another unless they have been added to the

343 # same group(s) via Workbench admin interface, change this to

344 # false.

345 ActivatedUsersAreVisibleToOthers: true

346

347 # The e-mail address of the user you would like to become marked as an admin

348 # user on their first login.

349 AutoAdminUserWithEmail: ""

350

351 # If AutoAdminFirstUser is set to true, the first user to log in when no

352 # other admin users exist will automatically become an admin user.

353 AutoAdminFirstUser: false

354

355 # Email address to notify whenever a user creates a profile for the

356 # first time

357 UserProfileNotificationAddress: ""

358 AdminNotifierEmailFrom: arvados@example.com

359 EmailSubjectPrefix: "[ARVADOS] "

360 UserNotifierEmailFrom: arvados@example.com

361 UserNotifierEmailBcc: {}

362 NewUserNotificationRecipients: {}

363 NewInactiveUserNotificationRecipients: {}

364

365 # Set AnonymousUserToken to enable anonymous user access. Populate this

366 # field with a random string at least 50 characters long.

367 AnonymousUserToken: ""

368

369 # If a new user has an alternate email address (local@domain)

370 # with the domain given here, its local part becomes the new

371 # user's default username. Otherwise, the user's primary email

372 # address is used.

373 PreferDomainForUsername: ""

374

375 UserSetupMailText: |

376 <% if not @user.full_name.empty? -%>

377 <%= @user.full_name %>,

378 <% else -%>

379 Hi there,

380 <% end -%>

381

382 Your Arvados account has been set up. You can log in at

383

384 <%= Rails.configuration.Services.Workbench1.ExternalURL %>

385

386 Thanks,

387 Your Arvados administrator.

388

389 # If RoleGroupsVisibleToAll is true, all role groups are visible

390 # to all active users.

391 #

392 # If false, users must be granted permission to role groups in

393 # order to see them. This is more appropriate for a multi-tenant

394 # cluster.

395 RoleGroupsVisibleToAll: true

396

397 # If CanCreateRoleGroups is true, regular (non-admin) users can

398 # create new role groups.

399 #

400 # If false, only admins can create new role groups.

401 CanCreateRoleGroups: true

402

403 # During each period, a log entry with event_type="activity"

404 # will be recorded for each user who is active during that

405 # period. The object_uuid attribute will indicate the user's

406 # UUID.

407 #

408 # Multiple log entries for the same user may be generated during

409 # a period if there are multiple controller processes or a

410 # controller process is restarted.

411 #

412 # Use 0 to disable activity logging.

413 ActivityLoggingPeriod: 24h

414

415 AuditLogs:

416 # Time to keep audit logs, in seconds. (An audit log is a row added

417 # to the "logs" table in the PostgreSQL database each time an

418 # Arvados object is created, modified, or deleted.)

419 #

420 # Currently, websocket event notifications rely on audit logs, so

421 # this should not be set lower than 300 (5 minutes).

422 MaxAge: 336h

423

424 # Maximum number of log rows to delete in a single SQL transaction.

425 #

426 # If MaxDeleteBatch is 0, log entries will never be

427 # deleted by Arvados. Cleanup can be done by an external process

428 # without affecting any Arvados system processes, as long as very

429 # recent (<5 minutes old) logs are not deleted.

430 #

431 # 100000 is a reasonable batch size for most sites.

432 MaxDeleteBatch: 0

433

434 # Attributes to suppress in events and audit logs. Notably,

435 # specifying {"manifest_text": {}} here typically makes the database

436 # smaller and faster.

437 #

438 # Warning: Using any non-empty value here can have undesirable side

439 # effects for any client or component that relies on event logs.

440 # Use at your own risk.

441 UnloggedAttributes: {}

442

443 SystemLogs:

444

445 # Logging threshold: panic, fatal, error, warn, info, debug, or

446 # trace

447 LogLevel: info

448

449 # Logging format: json or text

450 Format: json

451

452 # Maximum characters of (JSON-encoded) query parameters to include

453 # in each request log entry. When params exceed this size, they will

454 # be JSON-encoded, truncated to this size, and logged as

455 # params_truncated.

456 MaxRequestLogParamsSize: 2000

457

458 # In all services except RailsAPI, periodically check whether

459 # the incoming HTTP request queue is nearly full (see

460 # MaxConcurrentRequests) and, if so, write a snapshot of the

461 # request queue to {service}-requests.json in the specified

462 # directory.

463 #

464 # Leave blank to disable.

465 RequestQueueDumpDirectory: ""

466

467 Collections:

468

469 # Enable access controls for data stored in Keep. This should

470 # always be set to true on a production cluster.

471 BlobSigning: true

472

473 # BlobSigningKey is a string of alphanumeric characters used to

474 # generate permission signatures for Keep locators. It must be

475 # identical to the permission key given to Keep. IMPORTANT: This

476 # is a site secret. It should be at least 50 characters.

477 #

478 # Modifying BlobSigningKey will invalidate all existing

479 # signatures, which can cause programs to fail (e.g., arv-put,

480 # arv-get, and Crunch jobs). To avoid errors, rotate keys only

481 # when no such processes are running.

482 BlobSigningKey: ""

483

484 # Enable garbage collection of unreferenced blobs in Keep.

485 BlobTrash: true

486

487 # Time to leave unreferenced blobs in "trashed" state before

488 # deleting them, or 0 to skip the "trashed" state entirely and

489 # delete unreferenced blobs.

490 #

491 # If you use any Amazon S3 buckets as storage volumes, this

492 # must be at least 24h to avoid occasional data loss.

493 BlobTrashLifetime: 336h

494

495 # How often to check for (and delete) trashed blocks whose

496 # BlobTrashLifetime has expired.

497 BlobTrashCheckInterval: 24h

498

499 # Maximum number of concurrent "trash blob" and "delete trashed

500 # blob" operations conducted by a single keepstore process. Each

501 # of these can be set to 0 to disable the respective operation.

502 #

503 # If BlobTrashLifetime is zero, "trash" and "delete trash"

504 # happen at once, so only the lower of these two values is used.

505 BlobTrashConcurrency: 4

506 BlobDeleteConcurrency: 4

507

508 # Maximum number of concurrent "create additional replica of

509 # existing blob" operations conducted by a single keepstore

510 # process.

511 BlobReplicateConcurrency: 4

512

513 # Default replication level for collections. This is used when a

514 # collection's replication_desired attribute is nil.

515 DefaultReplication: 2

516

517 # BlobSigningTTL determines the minimum lifetime of transient

518 # data, i.e., blocks that are not referenced by

519 # collections. Unreferenced blocks exist for two reasons:

520 #

521 # 1) A data block must be written to a disk/cloud backend device

522 # before a collection can be created/updated with a reference to

523 # it.

524 #

525 # 2) Deleting or updating a collection can remove the last

526 # remaining reference to a data block.

527 #

528 # If BlobSigningTTL is too short, long-running

529 # processes/containers will fail when they take too long (a)

530 # between writing blocks and writing collections that reference

531 # them, or (b) between reading collections and reading the

532 # referenced blocks.

533 #

534 # If BlobSigningTTL is too long, data will still be stored long

535 # after the referring collections are deleted, and you will

536 # needlessly fill up disks or waste money on cloud storage.

537 #

538 # Modifying BlobSigningTTL invalidates existing signatures; see

539 # BlobSigningKey note above.

540 #

541 # The default is 2 weeks.

542 BlobSigningTTL: 336h

543

544 # When running keep-balance, this is the destination filename for

545 # the list of lost block hashes if there are any, one per line.

546 # Updated automically during each successful run.

547 BlobMissingReport: ""

548

549 # keep-balance operates periodically, i.e.: do a

550 # scan/balance operation, sleep, repeat.

551 #

552 # BalancePeriod determines the interval between start times of

553 # successive scan/balance operations. If a scan/balance operation

554 # takes longer than BalancePeriod, the next one will follow it

555 # immediately.

556 #

557 # If SIGUSR1 is received during an idle period between operations,

558 # the next operation will start immediately.

559 BalancePeriod: 6h

560

561 # Limits the number of collections retrieved by keep-balance per

562 # API transaction. If this is zero, page size is

563 # determined by the API server's own page size limits (see

564 # API.MaxItemsPerResponse and API.MaxIndexDatabaseRead).

565 BalanceCollectionBatch: 0

566

567 # The size of keep-balance's internal queue of

568 # collections. Higher values may improve throughput by allowing

569 # keep-balance to fetch collections from the database while the

570 # current collection are still being processed, at the expense of

571 # using more memory. If this is zero or omitted, pages are

572 # processed serially.

573 BalanceCollectionBuffers: 4

574

575 # Maximum time for a rebalancing run. This ensures keep-balance

576 # eventually gives up and retries if, for example, a network

577 # error causes a hung connection that is never closed by the

578 # OS. It should be long enough that it doesn't interrupt a

579 # long-running balancing operation.

580 BalanceTimeout: 6h

581

582 # Maximum number of replication_confirmed /

583 # storage_classes_confirmed updates to write to the database

584 # after a rebalancing run. When many updates are needed, this

585 # spreads them over a few runs rather than applying them all at

586 # once.

587 BalanceUpdateLimit: 100000

588

589 # Default lifetime for ephemeral collections: 2 weeks. This must not

590 # be less than BlobSigningTTL.

591 DefaultTrashLifetime: 336h

592

593 # Interval (seconds) between trash sweeps. During a trash sweep,

594 # collections are marked as trash if their trash_at time has

595 # arrived, and deleted if their delete_at time has arrived.

596 TrashSweepInterval: 60s

597

598 # If true, enable collection versioning.

599 # When a collection's preserve_version field is true or the current version

600 # is older than the amount of seconds defined on PreserveVersionIfIdle,

601 # a snapshot of the collection's previous state is created and linked to

602 # the current collection.

603 CollectionVersioning: true

604

605 # 0s = auto-create a new version on every update.

606 # -1s = never auto-create new versions.

607 # > 0s = auto-create a new version when older than the specified number of seconds.

608 PreserveVersionIfIdle: 10s

609

610 # If non-empty, allow project and collection names to contain

611 # the "/" character (slash/stroke/solidus), and replace "/" with

612 # the given string in the filesystem hierarchy presented by

613 # WebDAV. Example values are "%2f" and "{slash}". Names that

614 # contain the substitution string itself may result in confusing

615 # behavior, so a value like "_" is not recommended.

616 #

617 # If the default empty value is used, the server will reject

618 # requests to create or rename a collection when the new name

619 # contains "/".

620 #

621 # If the value "/" is used, project and collection names

622 # containing "/" will be allowed, but they will not be

623 # accessible via WebDAV.

624 #

625 # Use of this feature is not recommended, if it can be avoided.

626 ForwardSlashNameSubstitution: ""

627

628 # Include "folder objects" in S3 ListObjects responses.

629 S3FolderObjects: true

630

631 # Managed collection properties. At creation time, if the client didn't

632 # provide the listed keys, they will be automatically populated following

633 # one of the following behaviors:

634 #

635 # * UUID of the user who owns the containing project.

636 # responsible_person_uuid: {Function: original_owner, Protected: true}

637 #

638 # * Default concrete value.

639 # foo_bar: {Value: baz, Protected: false}

640 #

641 # If Protected is true, only an admin user can modify its value.

642 ManagedProperties:

643 SAMPLE: {Function: original_owner, Protected: true}

644

645 # In "trust all content" mode, Workbench will redirect download

646 # requests to WebDAV preview link, even in the cases when

647 # WebDAV would have to expose XSS vulnerabilities in order to

648 # handle the redirect (see discussion on Services.WebDAV).

649 #

650 # This setting has no effect in the recommended configuration, where the

651 # WebDAV service is configured to have a separate domain for every

652 # collection and XSS protection is provided by browsers' same-origin

653 # policy.

654 #

655 # The default setting (false) is appropriate for a multi-user site.

656 TrustAllContent: false

657

658 # Cache parameters for WebDAV content serving:

659 WebDAVCache:

660 # Time to cache manifests, permission checks, and sessions.

661 TTL: 300s

662

663 # Block cache entries. Each block consumes up to 64 MiB RAM.

664 MaxBlockEntries: 20

665

666 # Approximate memory limit (in bytes) for session cache.

667 #

668 # Note this applies to the in-memory representation of

669 # projects and collections -- metadata, block locators,

670 # filenames, etc. -- excluding cached file content, which is

671 # limited by MaxBlockEntries.

672 MaxCollectionBytes: 100000000

673

674 # Persistent sessions.

675 MaxSessions: 100

676

677 # Selectively set permissions for regular users and admins to

678 # download or upload data files using the upload/download

679 # features for Workbench, WebDAV and S3 API support.

680 WebDAVPermission:

681 User:

682 Download: true

683 Upload: true

684 Admin:

685 Download: true

686 Upload: true

687

688 # Selectively set permissions for regular users and admins to be

689 # able to download or upload blocks using arv-put and

690 # arv-get from outside the cluster.

691 KeepproxyPermission:

692 User:

693 Download: true

694 Upload: true

695 Admin:

696 Download: true

697 Upload: true

698

699 # Post upload / download events to the API server logs table, so

700 # that they can be included in the arv-user-activity report.

701 # You can disable this if you find that it is creating excess

702 # load on the API server and you don't need it.

703 WebDAVLogEvents: true

704

705 Login:

706 # One of the following mechanisms (Google, PAM, LDAP, or

707 # LoginCluster) should be enabled; see

708 # https://doc.arvados.org/install/setup-login.html

709

710 Google:

711 # Authenticate with Google.

712 Enable: false

713

714 # Use the Google Cloud console to enable the People API (APIs

715 # and Services > Enable APIs and services > Google People API

716 # > Enable), generate a Client ID and secret (APIs and

717 # Services > Credentials > Create credentials > OAuth client

718 # ID > Web application) and add your controller's /login URL

719 # (e.g., "https://zzzzz.example.com/login") as an authorized

720 # redirect URL.

721 ClientID: ""

722 ClientSecret: ""

723

724 # Allow users to log in to existing accounts using any verified

725 # email address listed by their Google account. If true, the

726 # Google People API must be enabled in order for Google login to

727 # work. If false, only the primary email address will be used.

728 AlternateEmailAddresses: true

729

730 # Send additional parameters with authentication requests. See

731 # https://developers.google.com/identity/protocols/oauth2/openid-connect#authenticationuriparameters

732 # for a list of supported parameters.

733 AuthenticationRequestParameters:

734 # Show the "choose which Google account" page, even if the

735 # client is currently logged in to exactly one Google

736 # account.

737 prompt: select_account

738

739 SAMPLE: ""

740

741 OpenIDConnect:

742 # Authenticate with an OpenID Connect provider.

743 Enable: false

744

745 # Issuer URL, e.g., "https://login.example.com".

746 #

747 # This must be exactly equal to the URL returned by the issuer

748 # itself in its config response ("isser" key). If the

749 # configured value is "https://example" and the provider

750 # returns "https://example:443" or "https://example/" then

751 # login will fail, even though those URLs are equivalent

752 # (RFC3986).

753 Issuer: ""

754

755 # Your client ID and client secret (supplied by the provider).

756 ClientID: ""

757 ClientSecret: ""

758

759 # OpenID claim field containing the user's email

760 # address. Normally "email"; see

761 # https://openid.net/specs/openid-connect-core-1_0.html#StandardClaims

762 EmailClaim: "email"

763

764 # OpenID claim field containing the email verification

765 # flag. Normally "email_verified". To accept every returned

766 # email address without checking a "verified" field at all,

767 # use the empty string "".

768 EmailVerifiedClaim: "email_verified"

769

770 # OpenID claim field containing the user's preferred

771 # username. If empty, use the mailbox part of the user's email

772 # address.

773 UsernameClaim: ""

774

775 # Send additional parameters with authentication requests,

776 # like {display: page, prompt: consent}. See

777 # https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest

778 # and refer to your provider's documentation for supported

779 # parameters.

780 AuthenticationRequestParameters:

781 SAMPLE: ""

782

783 # Accept an OIDC access token as an API token if the OIDC

784 # provider's UserInfo endpoint accepts it.

785 #

786 # AcceptAccessTokenScope should also be used when enabling

787 # this feature.

788 AcceptAccessToken: false

789

790 # Before accepting an OIDC access token as an API token, first

791 # check that it is a JWT whose "scope" value includes this

792 # value. Example: "https://zzzzz.example.com/" (your Arvados

793 # API endpoint).

794 #

795 # If this value is empty and AcceptAccessToken is true, all

796 # access tokens will be accepted regardless of scope,

797 # including non-JWT tokens. This is not recommended.

798 AcceptAccessTokenScope: ""

799

800 PAM:

801 # Use PAM to authenticate users.

802 Enable: false

803

804 # PAM service name. PAM will apply the policy in the

805 # corresponding config file (e.g., /etc/pam.d/arvados) or, if

806 # there is none, the default "other" config.

807 Service: arvados

808

809 # Domain name (e.g., "example.com") to use to construct the

810 # user's email address if PAM authentication returns a

811 # username with no "@". If empty, use the PAM username as the

812 # user's email address, whether or not it contains "@".

813 #

814 # Note that the email address is used as the primary key for

815 # user records when logging in. Therefore, if you change

816 # PAMDefaultEmailDomain after the initial installation, you

817 # should also update existing user records to reflect the new

818 # domain. Otherwise, next time those users log in, they will

819 # be given new accounts instead of accessing their existing

820 # accounts.

821 DefaultEmailDomain: ""

822

823 LDAP:

824 # Use an LDAP service to authenticate users.

825 Enable: false

826

827 # Server URL, like "ldap://ldapserver.example.com:389" or

828 # "ldaps://ldapserver.example.com:636".

829 URL: "ldap://ldap:389"

830

831 # Use StartTLS upon connecting to the server.

832 StartTLS: true

833

834 # Skip TLS certificate name verification.

835 InsecureTLS: false

836

837 # Mininum TLS version to negotiate when connecting to server

838 # (ldaps://... or StartTLS). It may be necessary to set this

839 # to "1.1" for compatibility with older LDAP servers that fail

840 # with 'LDAP Result Code 200 "Network Error": TLS handshake

841 # failed (tls: server selected unsupported protocol version

842 # 301)'.

843 #

844 # If blank, use the recommended minimum version (1.2).

845 MinTLSVersion: ""

846

847 # Strip the @domain part if a user supplies an email-style

848 # username with this domain. If "*", strip any user-provided

849 # domain. If "", never strip the domain part. Example:

850 # "example.com"

851 StripDomain: ""

852

853 # If, after applying StripDomain, the username contains no "@"

854 # character, append this domain to form an email-style

855 # username. Example: "example.com"

856 AppendDomain: ""

857

858 # The LDAP attribute to filter on when looking up a username

859 # (after applying StripDomain and AppendDomain).

860 SearchAttribute: uid

861

862 # Bind with this username (DN or UPN) and password when

863 # looking up the user record.

864 #

865 # Example user: "cn=admin,dc=example,dc=com"

866 SearchBindUser: ""

867 SearchBindPassword: ""

868

869 # Directory base for username lookup. Example:

870 # "ou=Users,dc=example,dc=com"

871 SearchBase: ""

872

873 # Additional filters to apply when looking up users' LDAP

874 # entries. This can be used to restrict access to a subset of

875 # LDAP users, or to disambiguate users from other directory

876 # entries that have the SearchAttribute present.

877 #

878 # Special characters in assertion values must be escaped (see

879 # RFC4515).

880 #

881 # Example: "(objectClass=person)"

882 SearchFilters: ""

883

884 # LDAP attribute to use as the user's email address.

885 #

886 # Important: This must not be an attribute whose value can be

887 # edited in the directory by the users themselves. Otherwise,

888 # users can take over other users' Arvados accounts trivially

889 # (email address is the primary key for Arvados accounts.)

890 EmailAttribute: mail

891

892 # LDAP attribute to use as the preferred Arvados username. If

893 # no value is found (or this config is empty) the username

894 # originally supplied by the user will be used.

895 UsernameAttribute: uid

896

897 Test:

898 # Authenticate users listed here in the config file. This

899 # feature is intended to be used in test environments, and

900 # should not be used in production.

901 Enable: false

902 Users:

903 SAMPLE:

904 Email: alice@example.com

905 Password: xyzzy

906

907 # The cluster ID to delegate the user database. When set,

908 # logins on this cluster will be redirected to the login cluster

909 # (login cluster must appear in RemoteClusters with Proxy: true)

910 LoginCluster: ""

911

912 # How long a cached token belonging to a remote cluster will

913 # remain valid before it needs to be revalidated.

914 RemoteTokenRefresh: 5m

915

916 # How long a client token created from a login flow will be valid without

917 # asking the user to re-login. Example values: 60m, 8h.

918 # Default value zero means tokens don't have expiration.

919 TokenLifetime: 0s

920

921 # If true (default) tokens issued through login are allowed to create

922 # new tokens.

923 # If false, tokens issued through login are not allowed to

924 # viewing/creating other tokens. New tokens can only be created

925 # by going through login again.

926 IssueTrustedTokens: true

927

928 # Origins (scheme://host[:port]) of clients trusted to receive

929 # new tokens via login process. The ExternalURLs of the local

930 # Workbench1 and Workbench2 are trusted implicitly and do not

931 # need to be listed here. If this is a LoginCluster, you

932 # probably want to include the other Workbench instances in the

933 # federation in this list.

934 #

935 # A wildcard like "https://*.example" will match client URLs

936 # like "https://a.example" and "https://a.b.c.example".

937 #

938 # Example:

939 #

940 # TrustedClients:

941 # "https://workbench.other-cluster.example": {}

942 # "https://workbench2.other-cluster.example": {}

943 TrustedClients:

944 SAMPLE: {}

945

946 # Treat any origin whose host part is "localhost" or a private

947 # IP address (e.g., http://10.0.0.123:3000/) as if it were

948 # listed in TrustedClients.

949 #

950 # Intended only for test/development use. Not appropriate for

951 # production use.

952 TrustPrivateNetworks: false

953

954 Git:

955 # Path to git or gitolite-shell executable. Each authenticated

956 # request will execute this program with the single argument "http-backend"

957 GitCommand: /usr/bin/git

958

959 # Path to Gitolite's home directory. If a non-empty path is given,

960 # the CGI environment will be set up to support the use of

961 # gitolite-shell as a GitCommand: for example, if GitoliteHome is

962 # "/gh", then the CGI environment will have GITOLITE_HTTP_HOME=/gh,

963 # PATH=$PATH:/gh/bin, and GL_BYPASS_ACCESS_CHECKS=1.

964 GitoliteHome: ""

965

966 # Git repositories must be readable by api server, or you won't be

967 # able to submit crunch jobs. To pass the test suites, put a clone

968 # of the arvados tree in {git_repositories_dir}/arvados.git or

969 # {git_repositories_dir}/arvados/.git

970 Repositories: /var/lib/arvados/git/repositories

971

972 TLS:

973 # Use "file:///var/lib/acme/live/example.com/cert" and

974 # ".../privkey" to load externally managed certificates.

975 Certificate: ""

976 Key: ""

977

978 # Accept invalid certificates when connecting to servers. Never

979 # use this in production.

980 Insecure: false

981

982 ACME:

983 # Obtain certificates automatically for ExternalURL domains

984 # using an ACME server and http-01 validation.

985 #

986 # To use Let's Encrypt, specify "LE". To use the Let's

987 # Encrypt staging environment, specify "LE-staging". To use a

988 # different ACME server, specify the full directory URL

989 # ("https://...").

990 #

991 # Note: this feature is not yet implemented in released

992 # versions, only in the alpha/prerelease arvados-server-easy

993 # package.

994 #

995 # Implies agreement with the server's terms of service.

996 Server: ""

997

998 Containers:

999 # List of supported Docker Registry image formats that compute nodes

1000 # are able to use. `arv keep docker` will error out if a user tries

1001 # to store an image with an unsupported format. Use an empty array

1002 # to skip the compatibility check (and display a warning message to

1003 # that effect).

1004 #

1005 # Example for sites running docker < 1.10: {"v1": {}}

1006 # Example for sites running docker >= 1.10: {"v2": {}}

1007 # Example for disabling check: {}

1008 SupportedDockerImageFormats:

1009 "v2": {}

1010 SAMPLE: {}

1011

1012 # Include details about job reuse decisions in the server log. This

1013 # causes additional database queries to run, so it should not be

1014 # enabled unless you expect to examine the resulting logs for

1015 # troubleshooting purposes.

1016 LogReuseDecisions: false

1017

1018 # Default value for keep_cache_ram of a container's

1019 # runtime_constraints. Note: this gets added to the RAM request

1020 # used to allocate a VM or submit an HPC job.

1021 #

1022 # If this is zero, container requests that don't specify RAM or

1023 # disk cache size will use a disk cache, sized to the

1024 # container's RAM requirement (but with minimum 2 GiB and

1025 # maximum 32 GiB).

1026 #

1027 # Note: If you change this value, containers that used the previous

1028 # default value will only be reused by container requests that

1029 # explicitly specify the previous value in their keep_cache_ram

1030 # runtime constraint.

1031 DefaultKeepCacheRAM: 0

1032

1033 # Number of times a container can be unlocked before being

1034 # automatically cancelled.

1035 MaxDispatchAttempts: 5

1036

1037 # Default value for container_count_max for container requests. This is the

1038 # number of times Arvados will create a new container to satisfy a container

1039 # request. If a container is cancelled it will retry a new container if

1040 # container_count < container_count_max on any container requests associated

1041 # with the cancelled container.

1042 MaxRetryAttempts: 3

1043

1044 # Schedule all child containers on preemptible instances (e.g. AWS

1045 # Spot Instances) even if not requested by the submitter.

1046 #

1047 # If false, containers are scheduled on preemptible instances

1048 # only when requested by the submitter.

1049 #

1050 # This flag is ignored if no preemptible instance types are

1051 # configured, and has no effect on top-level containers.

1052 AlwaysUsePreemptibleInstances: false

1053

1054 # Automatically add a preemptible variant for every

1055 # non-preemptible entry in InstanceTypes below. The maximum bid

1056 # price for the preemptible variant will be the non-preemptible

1057 # price multiplied by PreemptiblePriceFactor. If 0, preemptible

1058 # variants are not added automatically.

1059 #

1060 # A price factor of 1.0 is a reasonable starting point.

1061 PreemptiblePriceFactor: 0

1062

1063 # PEM encoded SSH key (RSA, DSA, or ECDSA) used by the

1064 # cloud dispatcher for executing containers on worker VMs.

1065 # Begins with "-----BEGIN RSA PRIVATE KEY-----\n"

1066 # and ends with "\n-----END RSA PRIVATE KEY-----\n".

1067 #

1068 # Use "file:///absolute/path/to/key" to load the key from a

1069 # separate file instead of embedding it in the configuration

1070 # file.

1071 DispatchPrivateKey: ""

1072

1073 # Maximum time to wait for workers to come up before abandoning

1074 # stale locks from a previous dispatch process.

1075 StaleLockTimeout: 1m

1076

1077 # The crunch-run command used to start a container on a worker node.

1078 #

1079 # When dispatching to cloud VMs, this is used only if

1080 # DeployRunnerBinary in the CloudVMs section is set to the empty

1081 # string.

1082 CrunchRunCommand: "crunch-run"

1083

1084 # Extra arguments to add to crunch-run invocation

1085 # Example: ["--cgroup-parent-subsystem=memory"]

1086 CrunchRunArgumentsList: []

1087

1088 # Extra RAM to reserve on the node, in addition to

1089 # the amount specified in the container's RuntimeConstraints

1090 ReserveExtraRAM: 550MiB

1091

1092 # Minimum time between two attempts to run the same container

1093 MinRetryPeriod: 0s

1094

1095 # Container runtime: "docker" (default) or "singularity"

1096 RuntimeEngine: docker

1097

1098 # When running a container, run a dedicated keepstore process,

1099 # using the specified number of 64 MiB memory buffers per

1100 # allocated CPU core (VCPUs in the container's runtime

1101 # constraints). The dedicated keepstore handles I/O for

1102 # collections mounted in the container, as well as saving

1103 # container logs.

1104 #

1105 # A zero value disables this feature.

1106 #

1107 # In order for this feature to be activated, no volume may use

1108 # AccessViaHosts, and no writable volume may have Replication

1109 # lower than Collections.DefaultReplication. If these

1110 # requirements are not satisfied, the feature is disabled

1111 # automatically regardless of the value given here.

1112 #

1113 # When an HPC dispatcher is in use (see SLURM and LSF sections),

1114 # this feature depends on the operator to ensure an up-to-date

1115 # cluster configuration file (/etc/arvados/config.yml) is

1116 # available on all compute nodes. If it is missing or not

1117 # readable by the crunch-run user, the feature will be disabled

1118 # automatically. To read it from a different location, add a

1119 # "-config=/path/to/config.yml" argument to

1120 # CrunchRunArgumentsList above.

1121 #

1122 # When the cloud dispatcher is in use (see CloudVMs section) and

1123 # this configuration is enabled, the entire cluster

1124 # configuration file, including the system root token, is copied

1125 # to the worker node and held in memory for the duration of the

1126 # container.

1127 LocalKeepBlobBuffersPerVCPU: 1

1128

1129 # When running a dedicated keepstore process for a container

1130 # (see LocalKeepBlobBuffersPerVCPU), write keepstore log

1131 # messages to keepstore.txt in the container's log collection.

1132 #

1133 # These log messages can reveal some volume configuration

1134 # details, error messages from the cloud storage provider, etc.,

1135 # which are not otherwise visible to users.

1136 #

1137 # Accepted values:

1138 # * "none" -- no keepstore.txt file

1139 # * "all" -- all logs, including request and response lines

1140 # * "errors" -- all logs except "response" logs with 2xx

1141 # response codes and "request" logs

1142 LocalKeepLogsToContainerLog: none

1143

1144 Logging:

1145 # Periodically (see SweepInterval) Arvados will check for

1146 # containers that have been finished for at least this long,

1147 # and delete their stdout, stderr, arv-mount, crunch-run, and

1148 # crunchstat logs from the logs table.

1149 MaxAge: 720h

1150

1151 # How often to delete cached log entries for finished

1152 # containers (see MaxAge).

1153 SweepInterval: 12h

1154

1155 # These two settings control how frequently log events are flushed to the

1156 # database. Log lines are buffered until either crunch_log_bytes_per_event

1157 # has been reached or crunch_log_seconds_between_events has elapsed since

1158 # the last flush.

1159 LogBytesPerEvent: 4096

1160 LogSecondsBetweenEvents: 5s

1161

1162 # The sample period for throttling logs.

1163 LogThrottlePeriod: 60s

1164

1165 # Maximum number of bytes that job can log over crunch_log_throttle_period

1166 # before being silenced until the end of the period.

1167 LogThrottleBytes: 65536

1168

1169 # Maximum number of lines that job can log over crunch_log_throttle_period

1170 # before being silenced until the end of the period.

1171 LogThrottleLines: 1024

1172

1173 # Maximum bytes that may be logged by a single job. Log bytes that are

1174 # silenced by throttling are not counted against this total.

1175 # If you set this to zero, each container will only create a single

1176 # log on the API server, noting for users that logging is throttled.

1177 LimitLogBytesPerJob: 67108864

1178

1179 LogPartialLineThrottlePeriod: 5s

1180

1181 # Container logs are written to Keep and saved in a

1182 # collection, which is updated periodically while the

1183 # container runs. This value sets the interval between

1184 # collection updates.

1185 LogUpdatePeriod: 30m

1186

1187 # The log collection is also updated when the specified amount of

1188 # log data (given in bytes) is produced in less than one update

1189 # period.

1190 LogUpdateSize: 32MiB

1191

1192 ShellAccess:

1193 # An admin user can use "arvados-client shell" to start an

1194 # interactive shell (with any user ID) in any running

1195 # container.

1196 Admin: false

1197

1198 # Any user can use "arvados-client shell" to start an

1199 # interactive shell (with any user ID) in any running

1200 # container that they started, provided it isn't also

1201 # associated with a different user's container request.

1202 #

1203 # Interactive sessions make it easy to alter the container's

1204 # runtime environment in ways that aren't recorded or

1205 # reproducible. Consider the implications for automatic

1206 # container reuse before enabling and using this feature. In

1207 # particular, note that starting an interactive session does

1208 # not disqualify a container from being reused by a different

1209 # user/workflow in the future.

1210 User: false

1211

1212 SLURM:

1213 PrioritySpread: 0

1214 SbatchArgumentsList: []

1215 SbatchEnvironmentVariables:

1216 SAMPLE: ""

1217 Managed:

1218 # Path to dns server configuration directory

1219 # (e.g. /etc/unbound.d/conf.d). If false, do not write any config

1220 # files or touch restart.txt (see below).

1221 DNSServerConfDir: ""

1222

1223 # Template file for the dns server host snippets. See

1224 # unbound.template in this directory for an example. If false, do

1225 # not write any config files.

1226 DNSServerConfTemplate: ""

1227

1228 # String to write to {dns_server_conf_dir}/restart.txt (with a

1229 # trailing newline) after updating local data. If false, do not

1230 # open or write the restart.txt file.

1231 DNSServerReloadCommand: ""

1232

1233 # Command to run after each DNS update. Template variables will be

1234 # substituted; see the "unbound" example below. If false, do not run

1235 # a command.

1236 DNSServerUpdateCommand: ""

1237

1238 ComputeNodeDomain: ""

1239 ComputeNodeNameservers:

1240 "192.168.1.1": {}

1241 SAMPLE: {}

1242

1243 # Hostname to assign to a compute node when it sends a "ping" and the

1244 # hostname in its Node record is nil.

1245 # During bootstrapping, the "ping" script is expected to notice the

1246 # hostname given in the ping response, and update its unix hostname

1247 # accordingly.

1248 # If false, leave the hostname alone (this is appropriate if your compute

1249 # nodes' hostnames are already assigned by some other mechanism).

1250 #

1251 # One way or another, the hostnames of your node records should agree

1252 # with your DNS records and your /etc/slurm-llnl/slurm.conf files.

1253 #

1254 # Example for compute0000, compute0001, ....:

1255 # assign_node_hostname: compute%<slot_number>04d

1256 # (See http://ruby-doc.org/core-2.2.2/Kernel.html#method-i-format for more.)

1257 AssignNodeHostname: "compute%<slot_number>d"

1258

1259 LSF:

1260 # Arguments to bsub when submitting Arvados containers as LSF jobs.

1261 #

1262 # Template variables starting with % will be substituted as follows:

1263 #

1264 # %U uuid

1265 # %C number of VCPUs

1266 # %M memory in MB

1267 # %T tmp in MB

1268 # %G number of GPU devices (runtime_constraints.cuda.device_count)

1269 #

1270 # Use %% to express a literal %. The %%J in the default will be changed

1271 # to %J, which is interpreted by bsub itself.

1272 #

1273 # Note that the default arguments cause LSF to write two files

1274 # in /tmp on the compute node each time an Arvados container

1275 # runs. Ensure you have something in place to delete old files

1276 # from /tmp, or adjust the "-o" and "-e" arguments accordingly.

1277 BsubArgumentsList: ["-o", "/tmp/crunch-run.%%J.out", "-e", "/tmp/crunch-run.%%J.err", "-J", "%U", "-n", "%C", "-D", "%MMB", "-R", "rusage[mem=%MMB:tmp=%TMB] span[hosts=1]", "-R", "select[mem>=%MMB]", "-R", "select[tmp>=%TMB]", "-R", "select[ncpus>=%C]"]

1278

1279 # Arguments that will be appended to the bsub command line

1280 # when submitting Arvados containers as LSF jobs with

1281 # runtime_constraints.cuda.device_count > 0

1282 BsubCUDAArguments: ["-gpu", "num=%G"]

1283

1284 # Use sudo to switch to this user account when submitting LSF

1285 # jobs.

1286 #

1287 # This account must exist on the hosts where LSF jobs run

1288 # ("execution hosts"), as well as on the host where the

1289 # Arvados LSF dispatcher runs ("submission host").

1290 BsubSudoUser: "crunch"

1291

1292 JobsAPI:

1293 # Enable the legacy 'jobs' API (crunch v1). This value must be a string.

1294 #

1295 # Note: this only enables read-only access, creating new

1296 # legacy jobs and pipelines is not supported.

1297 #

1298 # 'auto' -- (default) enable the Jobs API only if it has been used before

1299 # (i.e., there are job records in the database)

1300 # 'true' -- enable the Jobs API despite lack of existing records.

1301 # 'false' -- disable the Jobs API despite presence of existing records.

1302 Enable: 'auto'

1303

1304 # Git repositories must be readable by api server, or you won't be

1305 # able to submit crunch jobs. To pass the test suites, put a clone

1306 # of the arvados tree in {git_repositories_dir}/arvados.git or

1307 # {git_repositories_dir}/arvados/.git

1308 GitInternalDir: /var/lib/arvados/internal.git

1309

1310 CloudVMs:

1311 # Enable the cloud scheduler.

1312 Enable: false

1313

1314 # Name/number of port where workers' SSH services listen.

1315 SSHPort: "22"

1316

1317 # Interval between queue polls.

1318 PollInterval: 10s

1319

1320 # Shell command to execute on each worker to determine whether

1321 # the worker is booted and ready to run containers. It should

1322 # exit zero if the worker is ready.

1323 BootProbeCommand: "systemctl is-system-running"

1324

1325 # Minimum interval between consecutive probes to a single

1326 # worker.

1327 ProbeInterval: 10s

1328

1329 # Maximum probes per second, across all workers in a pool.

1330 MaxProbesPerSecond: 10

1331

1332 # Time before repeating SIGTERM when killing a container.

1333 TimeoutSignal: 5s

1334

1335 # Time to give up on a process (most likely arv-mount) that

1336 # still holds a container lockfile after its main supervisor

1337 # process has exited, and declare the instance broken.

1338 TimeoutStaleRunLock: 5s

1339

1340 # Time to give up on SIGTERM and write off the worker.

1341 TimeoutTERM: 2m

1342

1343 # Maximum create/destroy-instance operations per second (0 =

1344 # unlimited).

1345 MaxCloudOpsPerSecond: 10

1346

1347 # Maximum concurrent instance creation operations (0 = unlimited).

1348 #

1349 # MaxConcurrentInstanceCreateOps limits the number of instance creation

1350 # requests that can be in flight at any one time, whereas

1351 # MaxCloudOpsPerSecond limits the number of create/destroy operations

1352 # that can be started per second.

1353 #

1354 # Because the API for instance creation on Azure is synchronous, it is

1355 # recommended to increase MaxConcurrentInstanceCreateOps when running

1356 # on Azure. When using managed images, a value of 20 would be

1357 # appropriate. When using Azure Shared Image Galeries, it could be set

1358 # higher. For more information, see

1359 # https://docs.microsoft.com/en-us/azure/virtual-machines/linux/capture-image

1360 #

1361 # MaxConcurrentInstanceCreateOps can be increased for other cloud

1362 # providers too, if desired.

1363 MaxConcurrentInstanceCreateOps: 1

1364

1365 # The maximum number of instances to run at a time, or 0 for

1366 # unlimited.

1367 #

1368 # If more instances than this are already running and busy

1369 # when the dispatcher starts up, the running containers will

1370 # be allowed to finish before the excess instances are shut

1371 # down.

1372 MaxInstances: 64

1373

1374 # Maximum fraction of CloudVMs.MaxInstances allowed to run

1375 # "supervisor" containers at any given time. A supervisor is a

1376 # container whose purpose is mainly to submit and manage other

1377 # containers, such as arvados-cwl-runner workflow runner.

1378 #

1379 # If there is a hard limit on the amount of concurrent

1380 # containers that the cluster can run, it is important to

1381 # avoid crowding out the containers doing useful work with

1382 # containers who just create more work.

1383 #

1384 # For example, with the default MaxInstances of 64, it will

1385 # schedule at most floor(64*0.30) = 19 concurrent workflows,

1386 # ensuring 45 slots are available for work.

1387 SupervisorFraction: 0.30

1388

1389 # Interval between cloud provider syncs/updates ("list all

1390 # instances").

1391 SyncInterval: 1m

1392

1393 # Time to leave an idle worker running (in case new containers

1394 # appear in the queue that it can run) before shutting it

1395 # down.

1396 TimeoutIdle: 1m

1397

1398 # Time to wait for a new worker to boot (i.e., pass

1399 # BootProbeCommand) before giving up and shutting it down.

1400 TimeoutBooting: 10m

1401

1402 # Maximum time a worker can stay alive with no successful

1403 # probes before being automatically shut down.

1404 TimeoutProbe: 10m

1405

1406 # Time after shutting down a worker to retry the

1407 # shutdown/destroy operation.

1408 TimeoutShutdown: 10s

1409

1410 # Worker VM image ID.

1411 # (aws) AMI identifier

1412 # (azure) managed disks: the name of the managed disk image

1413 # (azure) shared image gallery: the name of the image definition. Also

1414 # see the SharedImageGalleryName and SharedImageGalleryImageVersion fields.

1415 # (azure) unmanaged disks (deprecated): the complete URI of the VHD, e.g.

1416 # https://xxxxx.blob.core.windows.net/system/Microsoft.Compute/Images/images/xxxxx.vhd

1417 ImageID: ""

1418

1419 # Shell script to run on new instances using the cloud

1420 # provider's UserData (EC2) or CustomData (Azure) feature.

1421 #

1422 # It is not necessary to include a #!/bin/sh line.

1423 InstanceInitCommand: ""

1424

1425 # An executable file (located on the dispatcher host) to be

1426 # copied to cloud instances at runtime and used as the

1427 # container runner/supervisor. The default value is the

1428 # dispatcher program itself.

1429 #

1430 # Use the empty string to disable this step: nothing will be

1431 # copied, and cloud instances are assumed to have a suitable

1432 # version of crunch-run installed; see CrunchRunCommand above.

1433 DeployRunnerBinary: "/proc/self/exe"

1434

1435 # Install the Dispatcher's SSH public key (derived from

1436 # DispatchPrivateKey) when creating new cloud

1437 # instances. Change this to false if you are using a different

1438 # mechanism to pre-install the public key on new instances.

1439 DeployPublicKey: true

1440

1441 # Tags to add on all resources (VMs, NICs, disks) created by

1442 # the container dispatcher. (Arvados's own tags --

1443 # InstanceType, IdleBehavior, and InstanceSecret -- will also

1444 # be added.)

1445 ResourceTags:

1446 SAMPLE: "tag value"

1447

1448 # Prefix for predefined tags used by Arvados (InstanceSetID,

1449 # InstanceType, InstanceSecret, IdleBehavior). With the

1450 # default value "Arvados", tags are "ArvadosInstanceSetID",

1451 # "ArvadosInstanceSecret", etc.

1452 #

1453 # This should only be changed while no cloud resources are in

1454 # use and the cloud dispatcher is not running. Otherwise,

1455 # VMs/resources that were added using the old tag prefix will

1456 # need to be detected and cleaned up manually.

1457 TagKeyPrefix: Arvados

1458

1459 # Cloud driver: "azure" (Microsoft Azure), "ec2" (Amazon AWS),

1460 # or "loopback" (run containers on dispatch host for testing

1461 # purposes).

1462 Driver: ec2

1463

1464 # Cloud-specific driver parameters.

1465 DriverParameters:

1466

1467 # (ec2) Credentials. Omit or leave blank if using IAM role.

1468 AccessKeyID: ""

1469 SecretAccessKey: ""

1470

1471 # (ec2) Instance configuration.

1472 SecurityGroupIDs:

1473 "SAMPLE": {}

1474 SubnetID: ""

1475 Region: ""

1476 EBSVolumeType: gp2

1477 AdminUsername: debian

1478 # (ec2) name of the IAMInstanceProfile for instances started by

1479 # the cloud dispatcher. Leave blank when not needed.

1480 IAMInstanceProfile: ""

1481

1482 # (ec2) how often to look up spot instance pricing data

1483 # (only while running spot instances) for the purpose of

1484 # calculating container cost estimates. A value of 0

1485 # disables spot price lookups entirely.

1486 SpotPriceUpdateInterval: 24h

1487

1488 # (ec2) per-GiB-month cost of EBS volumes. Matches

1489 # EBSVolumeType. Used to account for AddedScratch when

1490 # calculating container cost estimates. Note that

1491 # https://aws.amazon.com/ebs/pricing/ defines GB to mean

1492 # GiB, so an advertised price $0.10/GB indicates a real

1493 # price of $0.10/GiB and can be entered here as 0.10.

1494 EBSPrice: 0.10

1495

1496 # (azure) Credentials.

1497 SubscriptionID: ""

1498 ClientID: ""

1499 ClientSecret: ""

1500 TenantID: ""

1501

1502 # (azure) Instance configuration.

1503 CloudEnvironment: AzurePublicCloud

1504 Location: centralus

1505

1506 # (azure) The resource group where the VM and virtual NIC will be

1507 # created.

1508 ResourceGroup: ""

1509

1510 # (azure) The resource group of the Network to use for the virtual

1511 # NIC (if different from ResourceGroup)

1512 NetworkResourceGroup: ""

1513 Network: ""

1514 Subnet: ""

1515

1516 # (azure) managed disks: The resource group where the managed disk

1517 # image can be found (if different from ResourceGroup).

1518 ImageResourceGroup: ""

1519

1520 # (azure) shared image gallery: the name of the gallery

1521 SharedImageGalleryName: ""

1522 # (azure) shared image gallery: the version of the image definition

1523 SharedImageGalleryImageVersion: ""

1524

1525 # (azure) unmanaged disks (deprecated): Where to store the VM VHD blobs

1526 StorageAccount: ""

1527 BlobContainer: ""

1528

1529 # (azure) How long to wait before deleting VHD and NIC

1530 # objects that are no longer being used.

1531 DeleteDanglingResourcesAfter: 20s

1532

1533 # Account (that already exists in the VM image) that will be

1534 # set up with an ssh authorized key to allow the compute

1535 # dispatcher to connect.

1536 AdminUsername: arvados

1537

1538 InstanceTypes:

1539

1540 # Use the instance type name as the key (in place of "SAMPLE" in

1541 # this sample entry).

1542 SAMPLE:

1543 # Cloud provider's instance type. Defaults to the configured type name.

1544 ProviderType: ""

1545 VCPUs: 1

1546 RAM: 128MiB

1547 IncludedScratch: 16GB

1548 AddedScratch: 0

1549 # Hourly price ($), used to select node types for containers,

1550 # and to calculate estimated container costs. For spot

1551 # instances on EC2, this is also used as the maximum price

1552 # when launching spot instances, while the estimated container

1553 # cost is computed based on the current spot price according

1554 # to AWS. On Azure, and on-demand instances on EC2, the price

1555 # given here is used to compute container cost estimates.

1556 Price: 0.1

1557 Preemptible: false

1558 # Include this section if the node type includes GPU (CUDA) support

1559 CUDA:

1560 DriverVersion: "11.0"

1561 HardwareCapability: "9.0"

1562 DeviceCount: 1

1563

1564 StorageClasses:

1565

1566 # If you use multiple storage classes, specify them here, using

1567 # the storage class name as the key (in place of "SAMPLE" in

1568 # this sample entry).

1569 #

1570 # Further info/examples:

1571 # https://doc.arvados.org/admin/storage-classes.html

1572 SAMPLE:

1573

1574 # Priority determines the order volumes should be searched

1575 # when reading data, in cases where a keepstore server has

1576 # access to multiple volumes with different storage classes.

1577 Priority: 0

1578

1579 # Default determines which storage class(es) should be used

1580 # when a user/client writes data or saves a new collection

1581 # without specifying storage classes.

1582 #

1583 # If any StorageClasses are configured, at least one of them

1584 # must have Default: true.

1585 Default: true

1586

1587 Volumes:

1588 SAMPLE:

1589 # AccessViaHosts specifies which keepstore processes can read

1590 # and write data on the volume.

1591 #

1592 # For a local filesystem, AccessViaHosts has one entry,

1593 # indicating which server the filesystem is located on.

1594 #

1595 # For a network-attached backend accessible by all keepstore

1596 # servers, like a cloud storage bucket or an NFS mount,

1597 # AccessViaHosts can be empty/omitted.

1598 #

1599 # Further info/examples:

1600 # https://doc.arvados.org/install/configure-fs-storage.html

1601 # https://doc.arvados.org/install/configure-s3-object-storage.html

1602 # https://doc.arvados.org/install/configure-azure-blob-storage.html

1603 AccessViaHosts:

1604 SAMPLE:

1605 ReadOnly: false

1606 "http://host1.example:25107": {}

1607 ReadOnly: false

1608 Replication: 1

1609 StorageClasses:

1610 # If you have configured storage classes (see StorageClasses

1611 # section above), add an entry here for each storage class

1612 # satisfied by this volume.

1613 SAMPLE: true

1614 Driver: S3

1615 DriverParameters:

1616 # for s3 driver -- see

1617 # https://doc.arvados.org/install/configure-s3-object-storage.html

1618 IAMRole: aaaaa

1619 AccessKeyID: aaaaa

1620 SecretAccessKey: aaaaa

1621 Endpoint: ""

1622 Region: us-east-1

1623 Bucket: aaaaa

1624 LocationConstraint: false

1625 V2Signature: false

1626 IndexPageSize: 1000

1627 ConnectTimeout: 1m

1628 ReadTimeout: 10m

1629 RaceWindow: 24h

1630 PrefixLength: 0

1631

1632 # For S3 driver, potentially unsafe tuning parameter,

1633 # intentionally excluded from main documentation.

1634 #

1635 # Enable deletion (garbage collection) even when the

1636 # configured BlobTrashLifetime is zero. WARNING: eventual

1637 # consistency may result in race conditions that can cause

1638 # data loss. Do not enable this unless you understand and

1639 # accept the risk.

1640 UnsafeDelete: false

1641

1642 # for azure driver -- see

1643 # https://doc.arvados.org/install/configure-azure-blob-storage.html

1644 StorageAccountName: aaaaa

1645 StorageAccountKey: aaaaa

1646 StorageBaseURL: core.windows.net

1647 ContainerName: aaaaa

1648 RequestTimeout: 30s

1649 ListBlobsRetryDelay: 10s

1650 ListBlobsMaxAttempts: 10

1651 MaxGetBytes: 0

1652 WriteRaceInterval: 15s

1653 WriteRacePollTime: 1s

1654

1655 # for local directory driver -- see

1656 # https://doc.arvados.org/install/configure-fs-storage.html

1657 Root: /var/lib/arvados/keep-data

1658

1659 # For local directory driver, potentially confusing tuning

1660 # parameter, intentionally excluded from main documentation.

1661 #

1662 # When true, read and write operations (for whole 64MiB

1663 # blocks) on an individual volume will queued and issued

1664 # serially. When false, read and write operations will be

1665 # issued concurrently.

1666 #

1667 # May possibly improve throughput if you have physical spinning disks

1668 # and experience contention when there are multiple requests

1669 # to the same volume.

1670 #

1671 # Otherwise, when using SSDs, RAID, or a shared network filesystem, you

1672 # should leave this alone.

1673 Serialize: false

1674

1675 Mail:

1676 MailchimpAPIKey: ""

1677 MailchimpListID: ""

1678 SendUserSetupNotificationEmail: true

1679

1680 # Bug/issue report notification to and from addresses

1681 IssueReporterEmailFrom: "arvados@example.com"

1682 IssueReporterEmailTo: "arvados@example.com"

1683 SupportEmailAddress: "arvados@example.com"

1684

1685 # Generic issue email from

1686 EmailFrom: "arvados@example.com"

1687 RemoteClusters:

1688 "*":

1689 Host: ""

1690 Proxy: false

1691 Scheme: https

1692 Insecure: false

1693 ActivateUsers: false

1694 SAMPLE:

1695 # API endpoint host or host:port; default is {id}.arvadosapi.com

1696 Host: sample.arvadosapi.com

1697

1698 # Perform a proxy request when a local client requests an

1699 # object belonging to this remote.

1700 Proxy: false

1701

1702 # Default "https". Can be set to "http" for testing.

1703 Scheme: https

1704

1705 # Disable TLS verify. Can be set to true for testing.

1706 Insecure: false

1707

1708 # When users present tokens issued by this remote cluster, and

1709 # their accounts are active on the remote cluster, activate

1710 # them on this cluster too.

1711 ActivateUsers: false

1712

1713 Workbench:

1714 # Workbench1 configs

1715 Theme: default

1716 ActivationContactLink: mailto:info@arvados.org

1717 ArvadosDocsite: https://doc.arvados.org

1718 ArvadosPublicDataDocURL: https://playground.arvados.org/projects/public

1719 ShowUserAgreementInline: false

1720 SecretKeyBase: ""

1721

1722 # Set this configuration to true to avoid providing an easy way for users

1723 # to share data with unauthenticated users; this may be necessary on

1724 # installations where strict data access controls are needed.

1725 DisableSharingURLsUI: false

1726

1727 # Scratch directory used by the remote repository browsing

1728 # feature. If it doesn't exist, it (and any missing parents) will be

1729 # created using mkdir_p.

1730 RepositoryCache: /var/www/arvados-workbench/current/tmp/git

1731

1732 # Below is a sample setting of user_profile_form_fields config parameter.

1733 # This configuration parameter should be set to either false (to disable) or

1734 # to a map as shown below.

1735 # Configure the map of input fields to be displayed in the profile page

1736 # using the attribute "key" for each of the input fields.

1737 # This sample shows configuration with one required and one optional form fields.

1738 # For each of these input fields:

1739 # You can specify "Type" as "text" or "select".

1740 # List the "Options" to be displayed for each of the "select" menu.

1741 # Set "Required" as "true" for any of these fields to make them required.

1742 # If any of the required fields are missing in the user's profile, the user will be

1743 # redirected to the profile page before they can access any Workbench features.

1744 UserProfileFormFields:

1745 SAMPLE:

1746 Type: select

1747 FormFieldTitle: Best color

1748 FormFieldDescription: your favorite color

1749 Required: false

1750 Position: 1

1751 Options:

1752 red: {}

1753 blue: {}

1754 green: {}

1755 SAMPLE: {}

1756

1757 # exampleTextValue: # key that will be set in properties

1758 # Type: text #

1759 # FormFieldTitle: ""

1760 # FormFieldDescription: ""

1761 # Required: true

1762 # Position: 1

1763 # exampleOptionsValue:

1764 # Type: select

1765 # FormFieldTitle: ""

1766 # FormFieldDescription: ""

1767 # Required: true

1768 # Position: 1

1769 # Options:

1770 # red: {}

1771 # blue: {}

1772 # yellow: {}

1773

1774 # Use "UserProfileFormMessage to configure the message you want

1775 # to display on the profile page.

1776 UserProfileFormMessage: 'Welcome to Arvados. All <span style="color:red">required fields</span> must be completed before you can proceed.'

1777

1778 # Mimetypes of applications for which the view icon

1779 # would be enabled in a collection's show page.

1780 # It is sufficient to list only applications here.

1781 # No need to list text and image types.

1782 ApplicationMimetypesWithViewIcon:

1783 cwl: {}

1784 fasta: {}

1785 go: {}

1786 javascript: {}

1787 json: {}

1788 pdf: {}

1789 python: {}

1790 x-python: {}

1791 r: {}

1792 rtf: {}

1793 sam: {}

1794 x-sh: {}

1795 vnd.realvnc.bed: {}

1796 xml: {}

1797 xsl: {}

1798 SAMPLE: {}

1799

1800 # The maximum number of bytes to load in the log viewer

1801 LogViewerMaxBytes: 1M

1802

1803 # When anonymous_user_token is configured, show public projects page

1804 EnablePublicProjectsPage: true

1805

1806 # By default, disable the "Getting Started" popup which is specific to Arvados playground

1807 EnableGettingStartedPopup: false

1808

1809 # Ask Arvados API server to compress its response payloads.

1810 APIResponseCompression: true

1811

1812 # Timeouts for API requests.

1813 APIClientConnectTimeout: 2m

1814 APIClientReceiveTimeout: 5m

1815

1816 # Maximum number of historic log records of a running job to fetch

1817 # and display in the Log tab, while subscribing to web sockets.

1818 RunningJobLogRecordsToFetch: 2000

1819

1820 # In systems with many shared projects, loading of dashboard and topnav

1821 # can be slow due to collections indexing; use the following parameters

1822 # to suppress these properties

1823 ShowRecentCollectionsOnDashboard: true

1824 ShowUserNotifications: true

1825

1826 # Enable/disable "multi-site search" in top nav ("true"/"false"), or

1827 # a link to the multi-site search page on a "home" Workbench site.

1828 #

1829 # Example:

1830 # https://workbench.zzzzz.arvadosapi.com/collections/multisite

1831 MultiSiteSearch: ""

1832

1833 # Should workbench allow management of local git repositories? Set to false if

1834 # the jobs api is disabled and there are no local git repositories.

1835 Repositories: true

1836

1837 SiteName: Arvados Workbench

1838 ProfilingEnabled: false

1839

1840 # This is related to obsolete Google OpenID 1.0 login

1841 # but some workbench stuff still expects it to be set.

1842 DefaultOpenIdPrefix: "https://www.google.com/accounts/o8/id"

1843

1844 # Workbench2 configs

1845 FileViewersConfigURL: ""

1846

1847 # Idle time after which the user's session will be auto closed.

1848 # This feature is disabled when set to zero.

1849 IdleTimeout: 0s

1850

1851 # UUID of a collection. This collection should be shared with

1852 # all users. Workbench will look for a file "banner.html" in

1853 # this collection and display its contents (should be

1854 # HTML-formatted text) when users first log in to Workbench.

1855 BannerUUID: ""

1856

1857 # Workbench welcome screen, this is HTML text that will be

1858 # incorporated directly onto the page.

1859 WelcomePageHTML: |

1860 <img src="/arvados-logo-big.png" style="width: 20%; float: right; padding: 1em;" />

1861 <h2>Please log in.</h2>

1862

1863 <p>If you have never used Arvados Workbench before, logging in

1864 for the first time will automatically create a new

1865 account.</p>

1866

1867 <i>Arvados Workbench uses your information only for

1868 identification, and does not retrieve any other personal

1869 information.</i>

1870

1871 # Workbench screen displayed to inactive users. This is HTML

1872 # text that will be incorporated directly onto the page.

1873 InactivePageHTML: |

1874 <img src="/arvados-logo-big.png" style="width: 20%; float: right; padding: 1em;" />

1875 <h3>Hi! You're logged in, but...</h3>

1876 <p>Your account is inactive.</p>

1877 <p>An administrator must activate your account before you can get

1878 any further.</p>

1879

1880 # Connecting to Arvados shell VMs tends to be site-specific.

1881 # Put any special instructions here. This is HTML text that will

1882 # be incorporated directly onto the Workbench page.

1883 SSHHelpPageHTML: |

1884 <a href="https://doc.arvados.org/user/getting_started/ssh-access-unix.html">Accessing an Arvados VM with SSH</a> (generic instructions).

1885 Site configurations vary. Contact your local cluster administrator if you have difficulty accessing an Arvados shell node.

1886

1887 # Sample text if you are using a "switchyard" ssh proxy.

1888 # Replace "zzzzz" with your Cluster ID.

1889 #SSHHelpPageHTML: |

1890 # <p>Add a section like this to your SSH configuration file ( <i>~/.ssh/config</i>):</p>

1891 # <pre>Host *.zzzzz

1892 # TCPKeepAlive yes

1893 # ServerAliveInterval 60

1894 # ProxyCommand ssh -p2222 turnout@switchyard.zzzzz.arvadosapi.com -x -a $SSH_PROXY_FLAGS %h

1895 # </pre>

1896

1897 # If you are using a switchyard ssh proxy, shell node hostnames

1898 # may require a special hostname suffix. In the sample ssh

1899 # configuration above, this would be ".zzzzz"

1900 # This is added to the hostname in the "command line" column

1901 # the Workbench "shell VMs" page.

1902 #

1903 # If your shell nodes are directly accessible by users without a

1904 # proxy and have fully qualified host names, you should leave

1905 # this blank.

1906 SSHHelpHostSuffix: ""

1907

1908 # (Experimental) Restart services automatically when config file

1909 # changes are detected. Only supported by `arvados-server boot` in

1910 # dev/test mode.

1911 AutoReloadConfig: false