1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
17 "git.curoverse.com/arvados.git/lib/cloud"
18 "git.curoverse.com/arvados.git/sdk/go/arvados"
19 "github.com/prometheus/client_golang/prometheus"
20 "github.com/sirupsen/logrus"
21 "golang.org/x/crypto/ssh"
25 tagKeyInstanceType = "InstanceType"
26 tagKeyIdleBehavior = "IdleBehavior"
27 tagKeyInstanceSecret = "InstanceSecret"
28 tagKeyInstanceSetID = "InstanceSetID"
31 // An InstanceView shows a worker's current state and recent activity.
32 type InstanceView struct {
33 Instance cloud.InstanceID `json:"instance"`
34 Address string `json:"address"`
35 Price float64 `json:"price"`
36 ArvadosInstanceType string `json:"arvados_instance_type"`
37 ProviderInstanceType string `json:"provider_instance_type"`
38 LastContainerUUID string `json:"last_container_uuid"`
39 LastBusy time.Time `json:"last_busy"`
40 WorkerState string `json:"worker_state"`
41 IdleBehavior IdleBehavior `json:"idle_behavior"`
44 // An Executor executes shell commands on a remote host.
45 type Executor interface {
46 // Run cmd on the current target.
47 Execute(env map[string]string, cmd string, stdin io.Reader) (stdout, stderr []byte, err error)
49 // Use the given target for subsequent operations. The new
50 // target is the same host as the previous target, but it
51 // might return a different address and verify a different
54 // SetTarget is called frequently, and in most cases the new
55 // target will behave exactly the same as the old one. An
56 // implementation should optimize accordingly.
58 // SetTarget must not block on concurrent Execute calls.
59 SetTarget(cloud.ExecutorTarget)
65 defaultSyncInterval = time.Minute
66 defaultProbeInterval = time.Second * 10
67 defaultMaxProbesPerSecond = 10
68 defaultTimeoutIdle = time.Minute
69 defaultTimeoutBooting = time.Minute * 10
70 defaultTimeoutProbe = time.Minute * 10
71 defaultTimeoutShutdown = time.Second * 10
72 defaultTimeoutTERM = time.Minute * 2
73 defaultTimeoutSignal = time.Second * 5
75 // Time after a quota error to try again anyway, even if no
76 // instances have been shutdown.
77 quotaErrorTTL = time.Minute
79 // Time between "X failed because rate limiting" messages
80 logRateLimitErrorInterval = time.Second * 10
83 func duration(conf arvados.Duration, def time.Duration) time.Duration {
85 return time.Duration(conf)
91 // NewPool creates a Pool of workers backed by instanceSet.
93 // New instances are configured and set up according to the given
94 // cluster configuration.
95 func NewPool(logger logrus.FieldLogger, arvClient *arvados.Client, reg *prometheus.Registry, instanceSetID cloud.InstanceSetID, instanceSet cloud.InstanceSet, newExecutor func(cloud.Instance) Executor, installPublicKey ssh.PublicKey, cluster *arvados.Cluster) *Pool {
99 instanceSetID: instanceSetID,
100 instanceSet: &throttledInstanceSet{InstanceSet: instanceSet},
101 newExecutor: newExecutor,
102 bootProbeCommand: cluster.Containers.CloudVMs.BootProbeCommand,
103 imageID: cloud.ImageID(cluster.Containers.CloudVMs.ImageID),
104 instanceTypes: cluster.InstanceTypes,
105 maxProbesPerSecond: cluster.Containers.CloudVMs.MaxProbesPerSecond,
106 probeInterval: duration(cluster.Containers.CloudVMs.ProbeInterval, defaultProbeInterval),
107 syncInterval: duration(cluster.Containers.CloudVMs.SyncInterval, defaultSyncInterval),
108 timeoutIdle: duration(cluster.Containers.CloudVMs.TimeoutIdle, defaultTimeoutIdle),
109 timeoutBooting: duration(cluster.Containers.CloudVMs.TimeoutBooting, defaultTimeoutBooting),
110 timeoutProbe: duration(cluster.Containers.CloudVMs.TimeoutProbe, defaultTimeoutProbe),
111 timeoutShutdown: duration(cluster.Containers.CloudVMs.TimeoutShutdown, defaultTimeoutShutdown),
112 timeoutTERM: duration(cluster.Containers.CloudVMs.TimeoutTERM, defaultTimeoutTERM),
113 timeoutSignal: duration(cluster.Containers.CloudVMs.TimeoutSignal, defaultTimeoutSignal),
114 installPublicKey: installPublicKey,
115 tagKeyPrefix: cluster.Containers.CloudVMs.TagKeyPrefix,
116 stop: make(chan bool),
118 wp.registerMetrics(reg)
120 wp.setupOnce.Do(wp.setup)
128 // Pool is a resizable worker pool backed by a cloud.InstanceSet. A
129 // zero Pool should not be used. Call NewPool to create a new Pool.
132 logger logrus.FieldLogger
133 arvClient *arvados.Client
134 instanceSetID cloud.InstanceSetID
135 instanceSet *throttledInstanceSet
136 newExecutor func(cloud.Instance) Executor
137 bootProbeCommand string
138 imageID cloud.ImageID
139 instanceTypes map[string]arvados.InstanceType
140 syncInterval time.Duration
141 probeInterval time.Duration
142 maxProbesPerSecond int
143 timeoutIdle time.Duration
144 timeoutBooting time.Duration
145 timeoutProbe time.Duration
146 timeoutShutdown time.Duration
147 timeoutTERM time.Duration
148 timeoutSignal time.Duration
149 installPublicKey ssh.PublicKey
153 subscribers map[<-chan struct{}]chan<- struct{}
154 creating map[string]createCall // unfinished (cloud.InstanceSet)Create calls (key is instance secret)
155 workers map[cloud.InstanceID]*worker
156 loaded bool // loaded list of instances from InstanceSet at least once
157 exited map[string]time.Time // containers whose crunch-run proc has exited, but KillContainer has not been called
158 atQuotaUntil time.Time
159 atQuotaErr cloud.QuotaError
164 throttleCreate throttle
165 throttleInstances throttle
167 mContainersRunning prometheus.Gauge
168 mInstances *prometheus.GaugeVec
169 mInstancesPrice *prometheus.GaugeVec
170 mVCPUs *prometheus.GaugeVec
171 mMemory *prometheus.GaugeVec
172 mDisappearances *prometheus.CounterVec
175 type createCall struct {
177 instanceType arvados.InstanceType
180 // Subscribe returns a buffered channel that becomes ready after any
181 // change to the pool's state that could have scheduling implications:
182 // a worker's state changes, a new worker appears, the cloud
183 // provider's API rate limiting period ends, etc.
185 // Additional events that occur while the channel is already ready
186 // will be dropped, so it is OK if the caller services the channel
191 // ch := wp.Subscribe()
192 // defer wp.Unsubscribe(ch)
199 func (wp *Pool) Subscribe() <-chan struct{} {
200 wp.setupOnce.Do(wp.setup)
202 defer wp.mtx.Unlock()
203 ch := make(chan struct{}, 1)
204 wp.subscribers[ch] = ch
208 // Unsubscribe stops sending updates to the given channel.
209 func (wp *Pool) Unsubscribe(ch <-chan struct{}) {
210 wp.setupOnce.Do(wp.setup)
212 defer wp.mtx.Unlock()
213 delete(wp.subscribers, ch)
216 // Unallocated returns the number of unallocated (creating + booting +
217 // idle + unknown) workers for each instance type. Workers in
218 // hold/drain mode are not included.
219 func (wp *Pool) Unallocated() map[arvados.InstanceType]int {
220 wp.setupOnce.Do(wp.setup)
222 defer wp.mtx.RUnlock()
223 unalloc := map[arvados.InstanceType]int{}
224 creating := map[arvados.InstanceType]int{}
225 oldestCreate := map[arvados.InstanceType]time.Time{}
226 for _, cc := range wp.creating {
227 it := cc.instanceType
229 if t, ok := oldestCreate[it]; !ok || t.After(cc.time) {
230 oldestCreate[it] = cc.time
233 for _, wkr := range wp.workers {
234 // Skip workers that are not expected to become
235 // available soon. Note len(wkr.running)>0 is not
236 // redundant here: it can be true even in
238 if wkr.state == StateShutdown ||
239 wkr.state == StateRunning ||
240 wkr.idleBehavior != IdleBehaviorRun ||
241 len(wkr.running) > 0 {
246 if wkr.state == StateUnknown && creating[it] > 0 && wkr.appeared.After(oldestCreate[it]) {
247 // If up to N new workers appear in
248 // Instances() while we are waiting for N
249 // Create() calls to complete, we assume we're
250 // just seeing a race between Instances() and
251 // Create() responses.
253 // The other common reason why nodes have
254 // state==Unknown is that they appeared at
255 // startup, before any Create calls. They
256 // don't match the above timing condition, so
257 // we never mistakenly attribute them to
258 // pending Create calls.
262 for it, c := range creating {
268 // Create a new instance with the given type, and add it to the worker
269 // pool. The worker is added immediately; instance creation runs in
272 // Create returns false if a pre-existing error state prevents it from
273 // even attempting to create a new instance. Those errors are logged
274 // by the Pool, so the caller does not need to log anything in such
276 func (wp *Pool) Create(it arvados.InstanceType) bool {
277 logger := wp.logger.WithField("InstanceType", it.Name)
278 wp.setupOnce.Do(wp.setup)
280 defer wp.mtx.Unlock()
281 if time.Now().Before(wp.atQuotaUntil) || wp.throttleCreate.Error() != nil {
285 secret := randomHex(instanceSecretLength)
286 wp.creating[secret] = createCall{time: now, instanceType: it}
289 tags := cloud.InstanceTags{
290 wp.tagKeyPrefix + tagKeyInstanceSetID: string(wp.instanceSetID),
291 wp.tagKeyPrefix + tagKeyInstanceType: it.Name,
292 wp.tagKeyPrefix + tagKeyIdleBehavior: string(IdleBehaviorRun),
293 wp.tagKeyPrefix + tagKeyInstanceSecret: secret,
295 initCmd := cloud.InitCommand(fmt.Sprintf("umask 0177 && echo -n %q >%s", secret, instanceSecretFilename))
296 inst, err := wp.instanceSet.Create(it, wp.imageID, tags, initCmd, wp.installPublicKey)
298 defer wp.mtx.Unlock()
299 // delete() is deferred so the updateWorker() call
300 // below knows to use StateBooting when adding a new
302 defer delete(wp.creating, secret)
304 if err, ok := err.(cloud.QuotaError); ok && err.IsQuotaError() {
306 wp.atQuotaUntil = time.Now().Add(quotaErrorTTL)
307 time.AfterFunc(quotaErrorTTL, wp.notify)
309 logger.WithError(err).Error("create failed")
310 wp.instanceSet.throttleCreate.CheckRateLimitError(err, wp.logger, "create instance", wp.notify)
313 wp.updateWorker(inst, it)
318 // AtQuota returns true if Create is not expected to work at the
320 func (wp *Pool) AtQuota() bool {
322 defer wp.mtx.Unlock()
323 return time.Now().Before(wp.atQuotaUntil)
326 // SetIdleBehavior determines how the indicated instance will behave
327 // when it has no containers running.
328 func (wp *Pool) SetIdleBehavior(id cloud.InstanceID, idleBehavior IdleBehavior) error {
330 defer wp.mtx.Unlock()
331 wkr, ok := wp.workers[id]
333 return errors.New("requested instance does not exist")
335 wkr.setIdleBehavior(idleBehavior)
339 // Add or update worker attached to the given instance.
341 // The second return value is true if a new worker is created.
343 // A newly added instance has state=StateBooting if its tags match an
344 // entry in wp.creating, otherwise StateUnknown.
346 // Caller must have lock.
347 func (wp *Pool) updateWorker(inst cloud.Instance, it arvados.InstanceType) (*worker, bool) {
348 secret := inst.Tags()[wp.tagKeyPrefix+tagKeyInstanceSecret]
349 inst = tagVerifier{inst, secret}
351 if wkr := wp.workers[id]; wkr != nil {
352 wkr.executor.SetTarget(inst)
354 wkr.updated = time.Now()
359 state := StateUnknown
360 if _, ok := wp.creating[secret]; ok {
364 // If an instance has a valid IdleBehavior tag when it first
365 // appears, initialize the new worker accordingly (this is how
366 // we restore IdleBehavior that was set by a prior dispatch
367 // process); otherwise, default to "run". After this,
368 // wkr.idleBehavior is the source of truth, and will only be
369 // changed via SetIdleBehavior().
370 idleBehavior := IdleBehavior(inst.Tags()[wp.tagKeyPrefix+tagKeyIdleBehavior])
371 if !validIdleBehavior[idleBehavior] {
372 idleBehavior = IdleBehaviorRun
375 logger := wp.logger.WithFields(logrus.Fields{
376 "InstanceType": it.Name,
377 "Instance": inst.ID(),
378 "Address": inst.Address(),
380 logger.WithFields(logrus.Fields{
382 "IdleBehavior": idleBehavior,
383 }).Infof("instance appeared in cloud")
389 executor: wp.newExecutor(inst),
391 idleBehavior: idleBehavior,
398 running: make(map[string]*remoteRunner),
399 starting: make(map[string]*remoteRunner),
400 probing: make(chan struct{}, 1),
406 // Shutdown shuts down a worker with the given type, or returns false
407 // if all workers with the given type are busy.
408 func (wp *Pool) Shutdown(it arvados.InstanceType) bool {
409 wp.setupOnce.Do(wp.setup)
411 defer wp.mtx.Unlock()
412 logger := wp.logger.WithField("InstanceType", it.Name)
413 logger.Info("shutdown requested")
414 for _, tryState := range []State{StateBooting, StateIdle} {
415 // TODO: shutdown the worker with the longest idle
416 // time (Idle) or the earliest create time (Booting)
417 for _, wkr := range wp.workers {
418 if wkr.idleBehavior != IdleBehaviorHold && wkr.state == tryState && wkr.instType == it {
419 logger.WithField("Instance", wkr.instance).Info("shutting down")
428 // CountWorkers returns the current number of workers in each state.
430 // CountWorkers blocks, if necessary, until the initial instance list
431 // has been loaded from the cloud provider.
432 func (wp *Pool) CountWorkers() map[State]int {
433 wp.setupOnce.Do(wp.setup)
436 defer wp.mtx.Unlock()
438 for _, w := range wp.workers {
444 // Running returns the container UUIDs being prepared/run on workers.
446 // In the returned map, the time value indicates when the Pool
447 // observed that the container process had exited. A container that
448 // has not yet exited has a zero time value. The caller should use
449 // KillContainer() to garbage-collect the entries for exited
451 func (wp *Pool) Running() map[string]time.Time {
452 wp.setupOnce.Do(wp.setup)
454 defer wp.mtx.Unlock()
455 r := map[string]time.Time{}
456 for _, wkr := range wp.workers {
457 for uuid := range wkr.running {
458 r[uuid] = time.Time{}
460 for uuid := range wkr.starting {
461 r[uuid] = time.Time{}
464 for uuid, exited := range wp.exited {
470 // StartContainer starts a container on an idle worker immediately if
471 // possible, otherwise returns false.
472 func (wp *Pool) StartContainer(it arvados.InstanceType, ctr arvados.Container) bool {
473 wp.setupOnce.Do(wp.setup)
475 defer wp.mtx.Unlock()
477 for _, w := range wp.workers {
478 if w.instType == it && w.state == StateIdle {
479 if wkr == nil || w.busy.After(wkr.busy) {
487 wkr.startContainer(ctr)
491 // KillContainer kills the crunch-run process for the given container
492 // UUID, if it's running on any worker.
494 // KillContainer returns immediately; the act of killing the container
495 // takes some time, and runs in the background.
496 func (wp *Pool) KillContainer(uuid string, reason string) {
498 defer wp.mtx.Unlock()
499 logger := wp.logger.WithFields(logrus.Fields{
500 "ContainerUUID": uuid,
503 if _, ok := wp.exited[uuid]; ok {
504 logger.Debug("clearing placeholder for exited crunch-run process")
505 delete(wp.exited, uuid)
508 for _, wkr := range wp.workers {
509 rr := wkr.running[uuid]
511 rr = wkr.starting[uuid]
518 logger.Debug("cannot kill: already disappeared")
521 func (wp *Pool) registerMetrics(reg *prometheus.Registry) {
523 reg = prometheus.NewRegistry()
525 wp.mContainersRunning = prometheus.NewGauge(prometheus.GaugeOpts{
526 Namespace: "arvados",
527 Subsystem: "dispatchcloud",
528 Name: "containers_running",
529 Help: "Number of containers reported running by cloud VMs.",
531 reg.MustRegister(wp.mContainersRunning)
532 wp.mInstances = prometheus.NewGaugeVec(prometheus.GaugeOpts{
533 Namespace: "arvados",
534 Subsystem: "dispatchcloud",
535 Name: "instances_total",
536 Help: "Number of cloud VMs.",
537 }, []string{"category"})
538 reg.MustRegister(wp.mInstances)
539 wp.mInstancesPrice = prometheus.NewGaugeVec(prometheus.GaugeOpts{
540 Namespace: "arvados",
541 Subsystem: "dispatchcloud",
542 Name: "instances_price",
543 Help: "Price of cloud VMs.",
544 }, []string{"category"})
545 reg.MustRegister(wp.mInstancesPrice)
546 wp.mVCPUs = prometheus.NewGaugeVec(prometheus.GaugeOpts{
547 Namespace: "arvados",
548 Subsystem: "dispatchcloud",
550 Help: "Total VCPUs on all cloud VMs.",
551 }, []string{"category"})
552 reg.MustRegister(wp.mVCPUs)
553 wp.mMemory = prometheus.NewGaugeVec(prometheus.GaugeOpts{
554 Namespace: "arvados",
555 Subsystem: "dispatchcloud",
556 Name: "memory_bytes_total",
557 Help: "Total memory on all cloud VMs.",
558 }, []string{"category"})
559 reg.MustRegister(wp.mMemory)
560 wp.mDisappearances = prometheus.NewCounterVec(prometheus.CounterOpts{
561 Namespace: "arvados",
562 Subsystem: "dispatchcloud",
563 Name: "instances_disappeared",
564 Help: "Number of occurrences of an instance disappearing from the cloud provider's list of instances.",
565 }, []string{"state"})
566 for _, v := range stateString {
567 wp.mDisappearances.WithLabelValues(v).Add(0)
569 reg.MustRegister(wp.mDisappearances)
572 func (wp *Pool) runMetrics() {
574 defer wp.Unsubscribe(ch)
581 func (wp *Pool) updateMetrics() {
583 defer wp.mtx.RUnlock()
585 instances := map[string]int64{}
586 price := map[string]float64{}
587 cpu := map[string]int64{}
588 mem := map[string]int64{}
590 for _, wkr := range wp.workers {
593 case len(wkr.running)+len(wkr.starting) > 0:
595 case wkr.idleBehavior == IdleBehaviorHold:
597 case wkr.state == StateBooting:
599 case wkr.state == StateUnknown:
605 price[cat] += wkr.instType.Price
606 cpu[cat] += int64(wkr.instType.VCPUs)
607 mem[cat] += int64(wkr.instType.RAM)
608 running += int64(len(wkr.running) + len(wkr.starting))
610 for _, cat := range []string{"inuse", "hold", "booting", "unknown", "idle"} {
611 wp.mInstances.WithLabelValues(cat).Set(float64(instances[cat]))
612 wp.mInstancesPrice.WithLabelValues(cat).Set(price[cat])
613 wp.mVCPUs.WithLabelValues(cat).Set(float64(cpu[cat]))
614 wp.mMemory.WithLabelValues(cat).Set(float64(mem[cat]))
616 wp.mContainersRunning.Set(float64(running))
619 func (wp *Pool) runProbes() {
620 maxPPS := wp.maxProbesPerSecond
622 maxPPS = defaultMaxProbesPerSecond
624 limitticker := time.NewTicker(time.Second / time.Duration(maxPPS))
625 defer limitticker.Stop()
627 probeticker := time.NewTicker(wp.probeInterval)
628 defer probeticker.Stop()
630 workers := []cloud.InstanceID{}
631 for range probeticker.C {
632 workers = workers[:0]
634 for id, wkr := range wp.workers {
635 if wkr.state == StateShutdown || wkr.shutdownIfIdle() {
638 workers = append(workers, id)
642 for _, id := range workers {
644 wkr, ok := wp.workers[id]
647 // Deleted while we were probing
651 go wkr.ProbeAndUpdate()
655 case <-limitticker.C:
661 func (wp *Pool) runSync() {
662 // sync once immediately, then wait syncInterval, sync again,
664 timer := time.NewTimer(1)
668 err := wp.getInstancesAndSync()
670 wp.logger.WithError(err).Warn("sync failed")
672 timer.Reset(wp.syncInterval)
674 wp.logger.Debug("worker.Pool stopped")
680 // Stop synchronizing with the InstanceSet.
681 func (wp *Pool) Stop() {
682 wp.setupOnce.Do(wp.setup)
686 // Instances returns an InstanceView for each worker in the pool,
687 // summarizing its current state and recent activity.
688 func (wp *Pool) Instances() []InstanceView {
690 wp.setupOnce.Do(wp.setup)
692 for _, w := range wp.workers {
693 r = append(r, InstanceView{
694 Instance: w.instance.ID(),
695 Address: w.instance.Address(),
696 Price: w.instType.Price,
697 ArvadosInstanceType: w.instType.Name,
698 ProviderInstanceType: w.instType.ProviderType,
699 LastContainerUUID: w.lastUUID,
701 WorkerState: w.state.String(),
702 IdleBehavior: w.idleBehavior,
706 sort.Slice(r, func(i, j int) bool {
707 return strings.Compare(string(r[i].Instance), string(r[j].Instance)) < 0
712 // KillInstance destroys a cloud VM instance. It returns an error if
713 // the given instance does not exist.
714 func (wp *Pool) KillInstance(id cloud.InstanceID, reason string) error {
715 wkr, ok := wp.workers[id]
717 return errors.New("instance not found")
719 wkr.logger.WithField("Reason", reason).Info("shutting down")
724 func (wp *Pool) setup() {
725 wp.creating = map[string]createCall{}
726 wp.exited = map[string]time.Time{}
727 wp.workers = map[cloud.InstanceID]*worker{}
728 wp.subscribers = map[<-chan struct{}]chan<- struct{}{}
731 func (wp *Pool) notify() {
733 defer wp.mtx.RUnlock()
734 for _, send := range wp.subscribers {
736 case send <- struct{}{}:
742 func (wp *Pool) getInstancesAndSync() error {
743 wp.setupOnce.Do(wp.setup)
744 if err := wp.instanceSet.throttleInstances.Error(); err != nil {
747 wp.logger.Debug("getting instance list")
748 threshold := time.Now()
749 instances, err := wp.instanceSet.Instances(cloud.InstanceTags{wp.tagKeyPrefix + tagKeyInstanceSetID: string(wp.instanceSetID)})
751 wp.instanceSet.throttleInstances.CheckRateLimitError(err, wp.logger, "list instances", wp.notify)
754 wp.sync(threshold, instances)
755 wp.logger.Debug("sync done")
759 // Add/remove/update workers based on instances, which was obtained
760 // from the instanceSet. However, don't clobber any other updates that
761 // already happened after threshold.
762 func (wp *Pool) sync(threshold time.Time, instances []cloud.Instance) {
764 defer wp.mtx.Unlock()
765 wp.logger.WithField("Instances", len(instances)).Debug("sync instances")
768 for _, inst := range instances {
769 itTag := inst.Tags()[wp.tagKeyPrefix+tagKeyInstanceType]
770 it, ok := wp.instanceTypes[itTag]
772 wp.logger.WithField("Instance", inst).Errorf("unknown InstanceType tag %q --- ignoring", itTag)
775 if wkr, isNew := wp.updateWorker(inst, it); isNew {
777 } else if wkr.state == StateShutdown && time.Since(wkr.destroyed) > wp.timeoutShutdown {
778 wp.logger.WithField("Instance", inst).Info("worker still listed after shutdown; retrying")
783 for id, wkr := range wp.workers {
784 if wkr.updated.After(threshold) {
787 logger := wp.logger.WithFields(logrus.Fields{
788 "Instance": wkr.instance.ID(),
789 "WorkerState": wkr.state,
791 logger.Info("instance disappeared in cloud")
792 if wp.mDisappearances != nil {
793 wp.mDisappearances.WithLabelValues(stateString[wkr.state]).Inc()
795 delete(wp.workers, id)
803 wp.logger.WithField("N", len(wp.workers)).Info("loaded initial instance list")
811 func (wp *Pool) waitUntilLoaded() {
814 defer wp.mtx.RUnlock()
822 // Return a random string of n hexadecimal digits (n*4 random bits). n
824 func randomHex(n int) string {
825 buf := make([]byte, n/2)
826 _, err := rand.Read(buf)
830 return fmt.Sprintf("%x", buf)