15781: Adds test proving that 'contains' does case-sensitive matching.

[arvados.git] / lib / config / generated_config.go
diff --git a/lib/config/generated_config.go b/lib/config/generated_config.go

index 35edb05bcd683a1b07596d39e39ae441c1b7aa86..2ee6025077c64be9dff306b46e4a44acabc69394 100644 (file)
--- a/lib/config/generated_config.go
+++ b/lib/config/generated_config.go
@@ -31,8 +31,15 @@ Clusters:
        # listening, and reachable from other hosts in the cluster.
        SAMPLE:
          InternalURLs:
-          "http://example.host:12345": {}
-          SAMPLE: {}
+          "http://host1.example:12345": {}
+          "http://host2.example:12345":
+            # Rendezvous is normally empty/omitted. When changing the
+            # URL of a Keepstore service, Rendezvous should be set to
+            # the old URL (with trailing slash omitted) to preserve
+            # rendezvous ordering.
+            Rendezvous: ""
+          SAMPLE:
+            Rendezvous: ""
          ExternalURL: "-"
  
        RailsAPI:
@@ -182,6 +189,15 @@ Clusters:
        # parameter higher than this value, this value is used instead.
        MaxItemsPerResponse: 1000
  
+      # Maximum number of concurrent requests to accept in a single
+      # service process, or 0 for no limit. Currently supported only
+      # by keepstore.
+      MaxConcurrentRequests: 0
+
+      # Maximum number of 64MiB memory buffers per keepstore server
+      # process, or 0 for no limit.
+      MaxKeepBlobBuffers: 128
+
        # API methods to disable. Disabled methods are not listed in the
        # discovery document, and respond 404 to all requests.
        # Example: {"jobs.create":{}, "pipeline_instances.create": {}}
@@ -212,14 +228,17 @@ Clusters:
        WebsocketClientEventQueue: 64
        WebsocketServerEventQueue: 4
  
+      # Timeout on requests to internal Keep services.
+      KeepServiceRequestTimeout: 15s
+
      Users:
        # Config parameters to automatically setup new users.  If enabled,
        # this users will be able to self-activate.  Enable this if you want
        # to run an open instance where anyone can create an account and use
        # the system without requiring manual approval.
        #
-      # The params auto_setup_new_users_with_* are meaningful only when auto_setup_new_users is turned on.
-      # auto_setup_name_blacklist is a list of usernames to be blacklisted for auto setup.
+      # The params AutoSetupNewUsersWith* are meaningful only when AutoSetupNewUsers is turned on.
+      # AutoSetupUsernameBlacklist is a list of usernames to be blacklisted for auto setup.
        AutoSetupNewUsers: false
        AutoSetupNewUsersWithVmUUID: ""
        AutoSetupNewUsersWithRepository: false
@@ -232,7 +251,7 @@ Clusters:
          syslog: {}
          SAMPLE: {}
  
-      # When new_users_are_active is set to true, new users will be active
+      # When NewUsersAreActive is set to true, new users will be active
        # immediately.  This skips the "self-activate" step which enforces
        # user agreements.  Should only be enabled for development.
        NewUsersAreActive: false
@@ -244,7 +263,7 @@ Clusters:
        # should be an address associated with a Google account.
        AutoAdminUserWithEmail: ""
  
-      # If auto_admin_first_user is set to true, the first user to log in when no
+      # If AutoAdminFirstUser is set to true, the first user to log in when no
        # other admin users exist will automatically become an admin user.
        AutoAdminFirstUser: false
  
@@ -257,11 +276,17 @@ Clusters:
        NewUserNotificationRecipients: {}
        NewInactiveUserNotificationRecipients: {}
  
-      # Set anonymous_user_token to enable anonymous user access. You can get
+      # Set AnonymousUserToken to enable anonymous user access. You can get
        # the token by running "bundle exec ./script/get_anonymous_user_token.rb"
        # in the directory where your API server is running.
        AnonymousUserToken: ""
  
+      # If a new user has an alternate email address (local@domain)
+      # with the domain given here, its local part becomes the new
+      # user's default username. Otherwise, the user's primary email
+      # address is used.
+      PreferDomainForUsername: ""
+
      AuditLogs:
        # Time to keep audit logs, in seconds. (An audit log is a row added
        # to the "logs" table in the PostgreSQL database each time an
@@ -273,7 +298,7 @@ Clusters:
  
        # Maximum number of log rows to delete in a single SQL transaction.
        #
-      # If max_audit_log_delete_batch is 0, log entries will never be
+      # If MaxDeleteBatch is 0, log entries will never be
        # deleted by Arvados. Cleanup can be done by an external process
        # without affecting any Arvados system processes, as long as very
        # recent (<5 minutes old) logs are not deleted.
@@ -306,52 +331,114 @@ Clusters:
        MaxRequestLogParamsSize: 2000
  
      Collections:
-      # Allow clients to create collections by providing a manifest with
-      # unsigned data blob locators. IMPORTANT: This effectively disables
-      # access controls for data stored in Keep: a client who knows a hash
-      # can write a manifest that references the hash, pass it to
-      # collections.create (which will create a permission link), use
-      # collections.get to obtain a signature for that data locator, and
-      # use that signed locator to retrieve the data from Keep. Therefore,
-      # do not turn this on if your users expect to keep data private from
-      # one another!
+
+      # Enable access controls for data stored in Keep. This should
+      # always be set to true on a production cluster.
        BlobSigning: true
  
        # BlobSigningKey is a string of alphanumeric characters used to
        # generate permission signatures for Keep locators. It must be
-      # identical to the permission key given to Keep. IMPORTANT: This is
-      # a site secret. It should be at least 50 characters.
+      # identical to the permission key given to Keep. IMPORTANT: This
+      # is a site secret. It should be at least 50 characters.
        #
-      # Modifying blob_signing_key will invalidate all existing
+      # Modifying BlobSigningKey will invalidate all existing
        # signatures, which can cause programs to fail (e.g., arv-put,
-      # arv-get, and Crunch jobs).  To avoid errors, rotate keys only when
-      # no such processes are running.
+      # arv-get, and Crunch jobs).  To avoid errors, rotate keys only
+      # when no such processes are running.
        BlobSigningKey: ""
  
+      # Enable garbage collection of unreferenced blobs in Keep.
+      BlobTrash: true
+
+      # Time to leave unreferenced blobs in "trashed" state before
+      # deleting them, or 0 to skip the "trashed" state entirely and
+      # delete unreferenced blobs.
+      #
+      # If you use any Amazon S3 buckets as storage volumes, this
+      # must be at least 24h to avoid occasional data loss.
+      BlobTrashLifetime: 336h
+
+      # How often to check for (and delete) trashed blocks whose
+      # BlobTrashLifetime has expired.
+      BlobTrashCheckInterval: 24h
+
+      # Maximum number of concurrent "trash blob" and "delete trashed
+      # blob" operations conducted by a single keepstore process. Each
+      # of these can be set to 0 to disable the respective operation.
+      #
+      # If BlobTrashLifetime is zero, "trash" and "delete trash"
+      # happen at once, so only the lower of these two values is used.
+      BlobTrashConcurrency: 4
+      BlobDeleteConcurrency: 4
+
+      # Maximum number of concurrent "create additional replica of
+      # existing blob" operations conducted by a single keepstore
+      # process.
+      BlobReplicateConcurrency: 4
+
        # Default replication level for collections. This is used when a
        # collection's replication_desired attribute is nil.
        DefaultReplication: 2
  
-      # Lifetime (in seconds) of blob permission signatures generated by
-      # the API server. This determines how long a client can take (after
-      # retrieving a collection record) to retrieve the collection data
-      # from Keep. If the client needs more time than that (assuming the
-      # collection still has the same content and the relevant user/token
-      # still has permission) the client can retrieve the collection again
-      # to get fresh signatures.
+      # BlobSigningTTL determines the minimum lifetime of transient
+      # data, i.e., blocks that are not referenced by
+      # collections. Unreferenced blocks exist for two reasons:
+      #
+      # 1) A data block must be written to a disk/cloud backend device
+      # before a collection can be created/updated with a reference to
+      # it.
        #
-      # This must be exactly equal to the -blob-signature-ttl flag used by
-      # keepstore servers.  Otherwise, reading data blocks and saving
-      # collections will fail with HTTP 403 permission errors.
+      # 2) Deleting or updating a collection can remove the last
+      # remaining reference to a data block.
        #
-      # Modifying blob_signature_ttl invalidates existing signatures; see
-      # blob_signing_key note above.
+      # If BlobSigningTTL is too short, long-running
+      # processes/containers will fail when they take too long (a)
+      # between writing blocks and writing collections that reference
+      # them, or (b) between reading collections and reading the
+      # referenced blocks.
+      #
+      # If BlobSigningTTL is too long, data will still be stored long
+      # after the referring collections are deleted, and you will
+      # needlessly fill up disks or waste money on cloud storage.
+      #
+      # Modifying BlobSigningTTL invalidates existing signatures; see
+      # BlobSigningKey note above.
        #
        # The default is 2 weeks.
        BlobSigningTTL: 336h
  
+      # When running keep-balance, this is the destination filename for
+      # the list of lost block hashes if there are any, one per line.
+      # Updated automically during each successful run.
+      BlobMissingReport: ""
+
+      # keep-balance operates periodically, i.e.: do a
+      # scan/balance operation, sleep, repeat.
+      #
+      # BalancePeriod determines the interval between start times of
+      # successive scan/balance operations. If a scan/balance operation
+      # takes longer than RunPeriod, the next one will follow it
+      # immediately.
+      #
+      # If SIGUSR1 is received during an idle period between operations,
+      # the next operation will start immediately.
+      BalancePeriod: 10m
+
+      # Limits the number of collections retrieved by keep-balance per
+      # API transaction. If this is zero, page size is
+      # determined by the API server's own page size limits (see
+      # API.MaxItemsPerResponse and API.MaxIndexDatabaseRead).
+      BalanceCollectionBatch: 0
+
+      # The size of keep-balance's internal queue of
+      # collections. Higher values use more memory and improve throughput
+      # by allowing keep-balance to fetch the next page of collections
+      # while the current page is still being processed. If this is zero
+      # or omitted, pages are processed serially.
+      BalanceCollectionBuffers: 1000
+
        # Default lifetime for ephemeral collections: 2 weeks. This must not
-      # be less than blob_signature_ttl.
+      # be less than BlobSigningTTL.
        DefaultTrashLifetime: 336h
  
        # Interval (seconds) between trash sweeps. During a trash sweep,
@@ -361,7 +448,7 @@ Clusters:
  
        # If true, enable collection versioning.
        # When a collection's preserve_version field is true or the current version
-      # is older than the amount of seconds defined on preserve_version_if_idle,
+      # is older than the amount of seconds defined on PreserveVersionIfIdle,
        # a snapshot of the collection's previous state is created and linked to
        # the current collection.
        CollectionVersioning: false
@@ -371,6 +458,24 @@ Clusters:
        # > 0s = auto-create a new version when older than the specified number of seconds.
        PreserveVersionIfIdle: -1s
  
+      # If non-empty, allow project and collection names to contain
+      # the "/" character (slash/stroke/solidus), and replace "/" with
+      # the given string in the filesystem hierarchy presented by
+      # WebDAV. Example values are "%2f" and "{slash}". Names that
+      # contain the substitution string itself may result in confusing
+      # behavior, so a value like "_" is not recommended.
+      #
+      # If the default empty value is used, the server will reject
+      # requests to create or rename a collection when the new name
+      # contains "/".
+      #
+      # If the value "/" is used, project and collection names
+      # containing "/" will be allowed, but they will not be
+      # accessible via WebDAV.
+      #
+      # Use of this feature is not recommended, if it can be avoided.
+      ForwardSlashNameSubstitution: ""
+
        # Managed collection properties. At creation time, if the client didn't
        # provide the listed keys, they will be automatically populated following
        # one of the following behaviors:
@@ -398,13 +503,71 @@ Clusters:
        # The default setting (false) is appropriate for a multi-user site.
        TrustAllContent: false
  
+      # Cache parameters for WebDAV content serving:
+      # * TTL: Maximum time to cache manifests and permission checks.
+      # * UUIDTTL: Maximum time to cache collection state.
+      # * MaxBlockEntries: Maximum number of block cache entries.
+      # * MaxCollectionEntries: Maximum number of collection cache entries.
+      # * MaxCollectionBytes: Approximate memory limit for collection cache.
+      # * MaxPermissionEntries: Maximum number of permission cache entries.
+      # * MaxUUIDEntries: Maximum number of UUID cache entries.
+      WebDAVCache:
+        TTL: 300s
+        UUIDTTL: 5s
+        MaxBlockEntries:      4
+        MaxCollectionEntries: 1000
+        MaxCollectionBytes:   100000000
+        MaxPermissionEntries: 1000
+        MaxUUIDEntries:       1000
+
      Login:
-      # These settings are provided by your OAuth2 provider (e.g.,
-      # sso-provider).
-      ProviderAppSecret: ""
+      # These settings are provided by your OAuth2 provider (eg
+      # Google) used to perform upstream authentication.
        ProviderAppID: ""
+      ProviderAppSecret: ""
+
+      # (Experimental) Authenticate with Google, bypassing the
+      # SSO-provider gateway service. Use the Google Cloud console to
+      # enable the People API (APIs and Services > Enable APIs and
+      # services > Google People API > Enable), generate a Client ID
+      # and secret (APIs and Services > Credentials > Create
+      # credentials > OAuth client ID > Web application) and add your
+      # controller's /login URL (e.g.,
+      # "https://zzzzz.example.com/login") as an authorized redirect
+      # URL.
+      #
+      # Incompatible with ForceLegacyAPI14. ProviderAppID must be
+      # blank.
+      GoogleClientID: ""
+      GoogleClientSecret: ""
+
+      # Allow users to log in to existing accounts using any verified
+      # email address listed by their Google account. If true, the
+      # Google People API must be enabled in order for Google login to
+      # work. If false, only the primary email address will be used.
+      GoogleAlternateEmailAddresses: true
+
+      # The cluster ID to delegate the user database.  When set,
+      # logins on this cluster will be redirected to the login cluster
+      # (login cluster must appear in RemoteClusters with Proxy: true)
+      LoginCluster: ""
+
+      # How long a cached token belonging to a remote cluster will
+      # remain valid before it needs to be revalidated.
+      RemoteTokenRefresh: 5m
  
      Git:
+      # Path to git or gitolite-shell executable. Each authenticated
+      # request will execute this program with the single argument "http-backend"
+      GitCommand: /usr/bin/git
+
+      # Path to Gitolite's home directory. If a non-empty path is given,
+      # the CGI environment will be set up to support the use of
+      # gitolite-shell as a GitCommand: for example, if GitoliteHome is
+      # "/gh", then the CGI environment will have GITOLITE_HTTP_HOME=/gh,
+      # PATH=$PATH:/gh/bin, and GL_BYPASS_ACCESS_CHECKS=1.
+      GitoliteHome: ""
+
        # Git repositories must be readable by api server, or you won't be
        # able to submit crunch jobs. To pass the test suites, put a clone
        # of the arvados tree in {git_repositories_dir}/arvados.git or
@@ -498,7 +661,7 @@ Clusters:
          # has been reached or crunch_log_seconds_between_events has elapsed since
          # the last flush.
          LogBytesPerEvent: 4096
-        LogSecondsBetweenEvents: 1
+        LogSecondsBetweenEvents: 5s
  
          # The sample period for throttling logs.
          LogThrottlePeriod: 60s
@@ -531,6 +694,8 @@ Clusters:
        SLURM:
          PrioritySpread: 0
          SbatchArgumentsList: []
+        SbatchEnvironmentVariables:
+          SAMPLE: ""
          Managed:
            # Path to dns server configuration directory
            # (e.g. /etc/unbound.d/conf.d). If false, do not write any config
@@ -574,7 +739,11 @@ Clusters:
            AssignNodeHostname: "compute%<slot_number>d"
  
        JobsAPI:
-        # Enable the legacy Jobs API.  This value must be a string.
+        # Enable the legacy 'jobs' API (crunch v1).  This value must be a string.
+        #
+        # Note: this only enables read-only access, creating new
+        # legacy jobs and pipelines is not supported.
+        #
          # 'auto' -- (default) enable the Jobs API only if it has been used before
          #         (i.e., there are job records in the database)
          # 'true' -- enable the Jobs API despite lack of existing records.
@@ -587,30 +756,6 @@ Clusters:
          # {git_repositories_dir}/arvados/.git
          GitInternalDir: /var/lib/arvados/internal.git
  
-        # Docker image to be used when none found in runtime_constraints of a job
-        DefaultDockerImage: ""
-
-        # none or slurm_immediate
-        CrunchJobWrapper: none
-
-        # username, or false = do not set uid when running jobs.
-        CrunchJobUser: crunch
-
-        # The web service must be able to create/write this file, and
-        # crunch-job must be able to stat() it.
-        CrunchRefreshTrigger: /tmp/crunch_refresh_trigger
-
-        # Control job reuse behavior when two completed jobs match the
-        # search criteria and have different outputs.
-        #
-        # If true, in case of a conflict, reuse the earliest job (this is
-        # similar to container reuse behavior).
-        #
-        # If false, in case of a conflict, do not reuse any completed job,
-        # but do reuse an already-running job if available (this is the
-        # original job reuse behavior, and is still the default).
-        ReuseJobIfOutputsDiffer: false
-
        CloudVMs:
          # Enable the cloud scheduler (experimental).
          Enable: false
@@ -667,6 +812,16 @@ Clusters:
          # Worker VM image ID.
          ImageID: ""
  
+        # An executable file (located on the dispatcher host) to be
+        # copied to cloud instances at runtime and used as the
+        # container runner/supervisor. The default value is the
+        # dispatcher program itself.
+        #
+        # Use the empty string to disable this step: nothing will be
+        # copied, and cloud instances are assumed to have a suitable
+        # version of crunch-run installed.
+        DeployRunnerBinary: "/proc/self/exe"
+
          # Tags to add on all resources (VMs, NICs, disks) created by
          # the container dispatcher. (Arvados's own tags --
          # InstanceType, IdleBehavior, and InstanceSecret -- will also
@@ -734,6 +889,90 @@ Clusters:
          Price: 0.1
          Preemptible: false
  
+    Volumes:
+      SAMPLE:
+        # AccessViaHosts specifies which keepstore processes can read
+        # and write data on the volume.
+        #
+        # For a local filesystem, AccessViaHosts has one entry,
+        # indicating which server the filesystem is located on.
+        #
+        # For a network-attached backend accessible by all keepstore
+        # servers, like a cloud storage bucket or an NFS mount,
+        # AccessViaHosts can be empty/omitted.
+        #
+        # Further info/examples:
+        # https://doc.arvados.org/install/configure-fs-storage.html
+        # https://doc.arvados.org/install/configure-s3-object-storage.html
+        # https://doc.arvados.org/install/configure-azure-blob-storage.html
+        AccessViaHosts:
+          SAMPLE:
+            ReadOnly: false
+          "http://host1.example:25107": {}
+        ReadOnly: false
+        Replication: 1
+        StorageClasses:
+          default: true
+          SAMPLE: true
+        Driver: s3
+        DriverParameters:
+          # for s3 driver -- see
+          # https://doc.arvados.org/install/configure-s3-object-storage.html
+          IAMRole: aaaaa
+          AccessKey: aaaaa
+          SecretKey: aaaaa
+          Endpoint: ""
+          Region: us-east-1a
+          Bucket: aaaaa
+          LocationConstraint: false
+          IndexPageSize: 1000
+          ConnectTimeout: 1m
+          ReadTimeout: 10m
+          RaceWindow: 24h
+
+          # For S3 driver, potentially unsafe tuning parameter,
+          # intentionally excluded from main documentation.
+          #
+          # Enable deletion (garbage collection) even when the
+          # configured BlobTrashLifetime is zero.  WARNING: eventual
+          # consistency may result in race conditions that can cause
+          # data loss.  Do not enable this unless you understand and
+          # accept the risk.
+          UnsafeDelete: false
+
+          # for azure driver -- see
+          # https://doc.arvados.org/install/configure-azure-blob-storage.html
+          StorageAccountName: aaaaa
+          StorageAccountKey: aaaaa
+          StorageBaseURL: core.windows.net
+          ContainerName: aaaaa
+          RequestTimeout: 30s
+          ListBlobsRetryDelay: 10s
+          ListBlobsMaxAttempts: 10
+          MaxGetBytes: 0
+          WriteRaceInterval: 15s
+          WriteRacePollTime: 1s
+
+          # for local directory driver -- see
+          # https://doc.arvados.org/install/configure-fs-storage.html
+          Root: /var/lib/arvados/keep-data
+
+          # For local directory driver, potentially confusing tuning
+          # parameter, intentionally excluded from main documentation.
+          #
+          # When true, read and write operations (for whole 64MiB
+          # blocks) on an individual volume will queued and issued
+          # serially.  When false, read and write operations will be
+          # issued concurrently.
+          #
+          # May possibly improve throughput if you have physical spinning disks
+          # and experience contention when there are multiple requests
+          # to the same volume.
+          #
+          # Otherwise, when using SSDs, RAID, or a shared network filesystem, you
+          # should leave this alone.
+          Serialize: false
+
      Mail:
        MailchimpAPIKey: ""
        MailchimpListID: ""
@@ -902,6 +1141,35 @@ Clusters:
        VocabularyURL: ""
        FileViewersConfigURL: ""
  
-    # Use experimental controller code (see https://dev.arvados.org/issues/14287)
-    EnableBetaController14287: false
+      # Workbench welcome screen, this is HTML text that will be
+      # incorporated directly onto the page.
+      WelcomePageHTML: |
+        <img src="/arvados-logo-big.png" style="width: 20%; float: right; padding: 1em;" />
+        <h2>Please log in.</h2>
+
+        <p>The "Log in" button below will show you a sign-in
+        page. After you log in, you will be redirected back to
+        Arvados Workbench.</p>
+
+        <p>If you have never used Arvados Workbench before, logging in
+        for the first time will automatically create a new
+        account.</p>
+
+        <i>Arvados Workbench uses your name and email address only for
+        identification, and does not retrieve any other personal
+        information.</i>
+
+      InactivePageHTML: |
+        <img src="/arvados-logo-big.png" style="width: 20%; float: right; padding: 1em;" />
+        <h3>Hi! You're logged in, but...</h3>
+        <p>Your account is inactive.</p>
+        <p>An administrator must activate your account before you can get
+        any further.</p>
+
+    # Bypass new (Arvados 1.5) API implementations, and hand off
+    # requests directly to Rails instead. This can provide a temporary
+    # workaround for clients that are incompatible with the new API
+    # implementation. Note that it also disables some new federation
+    # features and will be removed in a future release.
+    ForceLegacyAPI14: false
  `)