X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/bd1aa20c5878436505b31aa987473ac3fbb6395c..18b6c49d69b8264273150cd29b2bf0b57c54e2a8:/doc/install/configure-s3-object-storage.html.textile.liquid diff --git a/doc/install/configure-s3-object-storage.html.textile.liquid b/doc/install/configure-s3-object-storage.html.textile.liquid index 58c05b11d4..76a2f3ab57 100644 --- a/doc/install/configure-s3-object-storage.html.textile.liquid +++ b/doc/install/configure-s3-object-storage.html.textile.liquid @@ -9,96 +9,97 @@ Copyright (C) The Arvados Authors. All rights reserved. SPDX-License-Identifier: CC-BY-SA-3.0 {% endcomment %} -As an alternative to local and network-attached POSIX filesystems, Keepstore can store data in object storage compatible with the S3 API, such as Amazon S3, Google Cloud Storage, or Ceph RADOS. - -h2. Configure keepstore - -Copy the "access key" and "secret key" to files where they will be accessible to keepstore at startup time. - - -
~$ sudo sh -c 'cat >/etc/arvados/keepstore/aws_s3_access_key.txt <<EOF'
-zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz==
-EOF
-~$ sudo chmod 0400 /etc/arvados/keepstore/aws_s3_access_key.txt
-
-
- -Next, edit the @Volumes@ section of the @keepstore.yml@ config file: - -
-Volumes:
-- # The volume type, this indicates object storage compatible with the S3 API
-  Type: S3
-
-  # Storage provider (blank uses Amazon S3 by default)
-  Endpoint: ""
-
-  # The bucket to use for the backing store.
-  Bucket: example-bucket-name
-
-  # The region where the bucket is located.
-  Region: us-east-1
-
-  # The credentials to use to access the bucket.
-  AccessKeyFile: /etc/arvados/keepstore/aws_s3_access_key.txt
-  SecretKeyFile: /etc/arvados/keepstore/aws_s3_secret_key.txt
-
-  # Maximum time to wait making the initial connection to the backend before
-  # failing the request.
-  ConnectTimeout: 1m0s
-
-  # Page size for s3 "list bucket contents" requests
-  IndexPageSize: 1000
-
-  # True if the region requires a LocationConstraint declaration
-  LocationConstraint: false
-
-  # Maximum eventual consistency latency
-  RaceWindow: 24h0m0s
-
-  # If true, do not accept write or trash operations, only reads.
-  ReadOnly: false
-
-  # Maximum time to wait for a complete response from the backend before
-  # failing the request.
-  ReadTimeout: 5m0s
-
-  # How much replication is performed by the underlying bucket.
-  # This is used to inform replication decisions at the Keep layer.
-  S3Replication: 2
-
-  # Storage classes to associate with this volume.  See "Configuring
-  # storage classes" in the "Admin" section of doc.arvados.org.
-  StorageClasses: null
-
-  # Enable deletion (garbage collection) even when TrashLifetime is
-  # zero.  WARNING: eventual consistency may result in race conditions
-  # that can cause data loss.  Do not enable this unless you know what
-  # you are doing.
-  UnsafeDelete: false
-
-- # Example configuration using alternate storage provider
-  # Configuration for Google cloud storage
-  Endpoint: https://storage.googleapis.com
-  Region: ""
-
-  AccessKeyFile: /etc/arvados/keepstore/gce_s3_access_key.txt
-  SecretKeyFile: /etc/arvados/keepstore/gce_s3_secret_key.txt
-  Bucket: example-bucket-name
-  ConnectTimeout: 1m0s
-  IndexPageSize: 1000
-  LocationConstraint: false
-  RaceWindow: 24h0m0s
-  ReadOnly: false
-  ReadTimeout: 5m0s
-  S3Replication: 2
-  StorageClasses: null
-  UnsafeDelete: false
-
- -Start (or restart) keepstore, and check its log file to confirm it is using the new configuration. - - -
2015/10/26 21:06:24 Using volume azure-storage-container:"exampleContainerName" (writable=true)
-
-
+Keepstore can store data in object storage compatible with the S3 API, such as Amazon S3, Google Cloud Storage, or Ceph RADOS. + +Volumes are configured in the @Volumes@ section of the cluster configuration file. + +{% include 'assign_volume_uuid' %} + +
    Volumes:
+      ClusterID-nyw5e-000000000000000:
+        AccessViaHosts:
+          # This section determines which keepstore servers access the
+          # volume. In this example, keep0 has read/write access, and
+          # keep1 has read-only access.
+          #
+          # If the AccessViaHosts section is empty or omitted, all
+          # keepstore servers will have read/write access to the
+          # volume.
+          "http://keep0.ClusterID.example.com:25107": {}
+          "http://keep1.ClusterID.example.com:25107": {ReadOnly: true}
+
+        Driver: S3
+        DriverParameters:
+          # Bucket name.
+          Bucket: example-bucket-name
+
+          # IAM role name to use when retrieving credentials from
+          # instance metadata. It can be omitted, in which case the
+          # role name itself will be retrieved from instance metadata
+          # -- but setting it explicitly may protect you from using
+          # the wrong credentials in the event of an
+          # installation/configuration error.
+          IAMRole: ""
+
+          # If you are not using an IAM role for authentication,
+          # specify access credentials here instead.
+          AccessKey: ""
+          SecretKey: ""
+
+          # Storage provider region. For Google Cloud Storage, use ""
+          # or omit.
+          Region: us-east-1a
+
+          # Storage provider endpoint. For Amazon S3, use "" or
+          # omit. For Google Cloud Storage, use
+          # "https://storage.googleapis.com".
+          Endpoint: ""
+
+          # Change to true if the region requires a LocationConstraint
+          # declaration.
+          LocationConstraint: false
+
+          # Use V2 signatures instead of the default V4. Amazon S3
+          # supports V4 signatures in all regions, but this option
+          # might be needed for other S3-compatible services.
+          V2Signature: false
+
+          # Use the AWS S3 v2 Go driver instead of the goamz driver.
+          UseAWSS3v2Driver: false
+
+          # Requested page size for "list bucket contents" requests.
+          IndexPageSize: 1000
+
+          # Maximum time to wait while making the initial connection
+          # to the backend before failing the request.
+          ConnectTimeout: 1m
+
+          # Maximum time to wait for a complete response from the
+          # backend before failing the request.
+          ReadTimeout: 2m
+
+          # Maximum eventual consistency latency
+          RaceWindow: 24h
+
+        # How much replication is provided by the underlying bucket.
+        # This is used to inform replication decisions at the Keep
+        # layer.
+        Replication: 2
+
+        # If true, do not accept write or trash operations, even if
+        # AccessViaHosts.*.ReadOnly is false.
+        #
+        # If false or omitted, enable write access (subject to
+        # AccessViaHosts.*.ReadOnly, where applicable).
+        ReadOnly: false
+
+        # Storage classes to associate with this volume.  See "Storage
+        # classes" in the "Admin" section of doc.arvados.org.
+        StorageClasses: null
+
+ +Two S3 drivers are available. Historically, Arvados has used the @goamz@ driver to talk to S3-compatible services. More recently, support for the @aws-sdk-go-v2@ driver was added. This driver can be activated by setting the @UseAWSS3v2Driver@ flag to @true@. + +The @aws-sdk-go-v2@ does not support the old S3 v2 signing algorithm. This will not affect interacting with AWS S3, but it might be an issue when Keep is backed by a very old version of a third party S3-compatible service. + +The @aws-sdk-go-v2@ driver can improve read performance by 50-100% over the @goamz@ driver, but it has not had as much production use. See the "wiki":https://dev.arvados.org/projects/arvados/wiki/Keep_real_world_performance_numbers for details.