1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
13 "git.arvados.org/arvados.git/sdk/go/arvados"
14 "github.com/prometheus/client_golang/prometheus"
15 "github.com/sirupsen/logrus"
16 "golang.org/x/crypto/ssh"
19 // A RateLimitError should be returned by an InstanceSet when the
20 // cloud service indicates it is rejecting all API calls for some time
22 type RateLimitError interface {
23 // Time before which the caller should expect requests to
25 EarliestRetry() time.Time
29 // A QuotaError should be returned by an InstanceSet when the cloud
30 // service indicates the account cannot create more VMs than already
32 type QuotaError interface {
33 // If true, don't create more instances until some existing
34 // instances are destroyed. If false, don't handle the error
40 // A CapacityError should be returned by an InstanceSet's Create
41 // method when the cloud service indicates it has insufficient
42 // capacity to create new instances -- i.e., we shouldn't retry right
44 type CapacityError interface {
45 // If true, wait before trying to create more instances.
46 IsCapacityError() bool
47 // If true, the condition is specific to the requested
48 // instance type. Wait before trying to create more instances
50 IsInstanceTypeSpecific() bool
51 // If true, the condition affects all instance types in the
52 // same instance family. This implies
53 // IsInstanceTypeSpecific() returns false.
54 IsInstanceQuotaGroupSpecific() bool
58 type SharedResourceTags map[string]string
59 type InstanceSetID string
60 type InstanceTags map[string]string
61 type InstanceID string
62 type InstanceQuotaGroup string
65 // An Executor executes commands on an ExecutorTarget.
66 type Executor interface {
67 // Update the set of private keys used to authenticate to
69 SetSigners(...ssh.Signer)
71 // Set the target used for subsequent command executions.
72 SetTarget(ExecutorTarget)
74 // Return the current target.
75 Target() ExecutorTarget
77 // Execute a shell command and return the resulting stdout and
78 // stderr. stdin can be nil.
79 Execute(cmd string, stdin io.Reader) (stdout, stderr []byte, err error)
82 var ErrNotImplemented = errors.New("not implemented")
84 // An ExecutorTarget is a remote command execution service.
85 type ExecutorTarget interface {
86 // SSH server hostname or IP address, or empty string if
87 // unknown while instance is booting.
90 // Remote username to send during SSH authentication.
93 // Return nil if the given public key matches the instance's
94 // SSH server key. If the provided Dialer is not nil,
95 // VerifyHostKey can use it to make outgoing network
96 // connections from the instance -- e.g., to use the cloud's
97 // "this instance's metadata" API.
99 // Return ErrNotImplemented if no verification mechanism is
101 VerifyHostKey(ssh.PublicKey, *ssh.Client) error
104 // Instance is implemented by the provider-specific instance types.
105 type Instance interface {
108 // ID returns the provider's instance ID. It must be stable
109 // for the life of the instance.
112 // String typically returns the cloud-provided instance ID.
115 // Cloud provider's "instance type" ID. Matches a ProviderType
116 // in the cluster's InstanceTypes configuration.
117 ProviderType() string
122 // Replace tags with the given tags
123 SetTags(InstanceTags) error
125 // Get recent price history, if available. The InstanceType is
126 // supplied as an argument so the driver implementation can
127 // account for AddedScratch cost without requesting the volume
128 // attachment information from the provider's API.
129 PriceHistory(arvados.InstanceType) []InstancePrice
131 // Shut down the node
135 // An InstanceSet manages a set of VM instances created by an elastic
136 // cloud provider like AWS, GCE, or Azure.
138 // All public methods of an InstanceSet, and all public methods of the
139 // instances it returns, are goroutine safe.
140 type InstanceSet interface {
141 // Create a new instance with the given type, image, and
142 // initial set of tags. If supported by the driver, add the
143 // provided public key to /root/.ssh/authorized_keys.
145 // The given InitCommand should be executed on the newly
146 // created instance. This is optional for a driver whose
147 // instances' VerifyHostKey() method never returns
148 // ErrNotImplemented. InitCommand will be under 1 KiB.
150 // The returned error should implement RateLimitError and
151 // QuotaError where applicable.
152 Create(arvados.InstanceType, ImageID, InstanceTags, InitCommand, ssh.PublicKey) (Instance, error)
154 // Return all instances, including ones that are booting or
155 // shutting down. Optionally, filter out nodes that don't have
156 // all of the given InstanceTags (the caller will ignore these
159 // An instance returned by successive calls to Instances() may
160 // -- but does not need to -- be represented by the same
161 // Instance object each time. Thus, the caller is responsible
162 // for de-duplicating the returned instances by comparing the
163 // InstanceIDs returned by the instances' ID() methods.
164 Instances(InstanceTags) ([]Instance, error)
166 // Return the instance quota group of the given instance type.
167 // See (CapacityError)IsInstanceQuotaGroupSpecific().
168 InstanceQuotaGroup(arvados.InstanceType) InstanceQuotaGroup
170 // Stop any background tasks and release other resources.
174 type InstancePrice struct {
179 type InitCommand string
181 // A Driver returns an InstanceSet that uses the given InstanceSetID
182 // and driver-dependent configuration parameters.
184 // If the driver creates cloud resources that aren't attached to a
185 // single VM instance (like SSH key pairs on AWS) and support tagging,
186 // they should be tagged with the provided SharedResourceTags.
188 // The supplied id will be of the form "zzzzz-zzzzz-zzzzzzzzzzzzzzz"
189 // where each z can be any alphanum. The returned InstanceSet must use
190 // this id to tag long-lived cloud resources that it creates, and must
191 // assume control of any existing resources that are tagged with the
192 // same id. Tagging can be accomplished by including the ID in
193 // resource names, using the cloud provider's tagging feature, or any
194 // other mechanism. The tags must be visible to another instance of
195 // the same driver running on a different host.
197 // The returned InstanceSet must not modify or delete cloud resources
198 // unless they are tagged with the given InstanceSetID or the caller
199 // (dispatcher) calls Destroy() on them. It may log a summary of
200 // untagged resources once at startup, though. Thus, two identically
201 // configured InstanceSets running on different hosts with different
202 // ids should log about the existence of each other's resources at
203 // startup, but will not interfere with each other.
205 // The dispatcher always passes the InstanceSetID as a tag when
206 // calling Create() and Instances(), so the driver does not need to
207 // tag/filter VMs by InstanceSetID itself.
211 // type exampleInstanceSet struct {
216 // type exampleDriver struct {}
218 // func (*exampleDriver) InstanceSet(config json.RawMessage, id cloud.InstanceSetID, tags cloud.SharedResourceTags, logger logrus.FieldLogger, reg *prometheus.Registry) (cloud.InstanceSet, error) {
219 // var is exampleInstanceSet
220 // if err := json.Unmarshal(config, &is); err != nil {
226 type Driver interface {
227 InstanceSet(config json.RawMessage, id InstanceSetID, tags SharedResourceTags, logger logrus.FieldLogger, reg *prometheus.Registry) (InstanceSet, error)
230 // DriverFunc makes a Driver using the provided function as its
231 // InstanceSet method. This is similar to http.HandlerFunc.
232 func DriverFunc(fn func(config json.RawMessage, id InstanceSetID, tags SharedResourceTags, logger logrus.FieldLogger, reg *prometheus.Registry) (InstanceSet, error)) Driver {
233 return driverFunc(fn)
236 type driverFunc func(config json.RawMessage, id InstanceSetID, tags SharedResourceTags, logger logrus.FieldLogger, reg *prometheus.Registry) (InstanceSet, error)
238 func (df driverFunc) InstanceSet(config json.RawMessage, id InstanceSetID, tags SharedResourceTags, logger logrus.FieldLogger, reg *prometheus.Registry) (InstanceSet, error) {
239 return df(config, id, tags, logger, reg)