16739: Merge branch 'master' into 16739-concurrent-node-create-throttle
authorWard Vandewege <ward@curii.com>
Tue, 1 Sep 2020 14:33:27 +0000 (10:33 -0400)
committerWard Vandewege <ward@curii.com>
Tue, 1 Sep 2020 14:33:48 +0000 (10:33 -0400)
Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward@curii.com>

doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid
lib/config/config.default.yml
lib/config/generated_config.go
lib/dispatchcloud/worker/pool.go
lib/dispatchcloud/worker/pool_test.go
sdk/go/arvados/config.go

index 68417784701ce387e7437bb0f0b8e62a2335e5ff..151e211653c0d77d73af31749bf71836124998e1 100644 (file)
@@ -100,6 +100,9 @@ Using managed disks:
       CloudVMs:
         ImageID: "zzzzz-compute-v1597349873"
         Driver: azure
+        # (azure) managed disks: set MaxConcurrentInstanceCreateOps to 20 to avoid timeouts, cf
+        # https://docs.microsoft.com/en-us/azure/virtual-machines/linux/capture-image
+        MaxConcurrentInstanceCreateOps: 20
         DriverParameters:
           # Credentials.
           SubscriptionID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
index 80294afaf35f1f701928f0c1ce99c3c09ca35e09..57baf534277625aa2566baeb89b2dff70682f0a0 100644 (file)
@@ -945,6 +945,12 @@ Clusters:
         # unlimited).
         MaxCloudOpsPerSecond: 0
 
+        # Maximum concurrent node creation operations (0 = unlimited). This is
+        # recommended by Azure in certain scenarios (see
+        # https://docs.microsoft.com/en-us/azure/virtual-machines/linux/capture-image)
+        # and can be used with other cloud providers too, if desired.
+        MaxConcurrentInstanceCreateOps: 0
+
         # Interval between cloud provider syncs/updates ("list all
         # instances").
         SyncInterval: 1m
index 57204cf36a2dbe49731c2d7cc32ad51f09522f0a..3df644cf6ea56ef686f12abb1b51c599c3825fe8 100644 (file)
@@ -951,6 +951,12 @@ Clusters:
         # unlimited).
         MaxCloudOpsPerSecond: 0
 
+        # Maximum concurrent node creation operations (0 = unlimited). This is
+        # recommended by Azure in certain scenarios (see
+        # https://docs.microsoft.com/en-us/azure/virtual-machines/linux/capture-image)
+        # and can be used with other cloud providers too, if desired.
+        MaxConcurrentInstanceCreateOps: 0
+
         # Interval between cloud provider syncs/updates ("list all
         # instances").
         SyncInterval: 1m
index 12bc1cdd71636263cebc0c8f21bd283d791aec04..435b6e43ae4a3e150f680883aa7e124daf6e4230 100644 (file)
@@ -96,27 +96,28 @@ func duration(conf arvados.Duration, def time.Duration) time.Duration {
 // cluster configuration.
 func NewPool(logger logrus.FieldLogger, arvClient *arvados.Client, reg *prometheus.Registry, instanceSetID cloud.InstanceSetID, instanceSet cloud.InstanceSet, newExecutor func(cloud.Instance) Executor, installPublicKey ssh.PublicKey, cluster *arvados.Cluster) *Pool {
        wp := &Pool{
-               logger:             logger,
-               arvClient:          arvClient,
-               instanceSetID:      instanceSetID,
-               instanceSet:        &throttledInstanceSet{InstanceSet: instanceSet},
-               newExecutor:        newExecutor,
-               bootProbeCommand:   cluster.Containers.CloudVMs.BootProbeCommand,
-               runnerSource:       cluster.Containers.CloudVMs.DeployRunnerBinary,
-               imageID:            cloud.ImageID(cluster.Containers.CloudVMs.ImageID),
-               instanceTypes:      cluster.InstanceTypes,
-               maxProbesPerSecond: cluster.Containers.CloudVMs.MaxProbesPerSecond,
-               probeInterval:      duration(cluster.Containers.CloudVMs.ProbeInterval, defaultProbeInterval),
-               syncInterval:       duration(cluster.Containers.CloudVMs.SyncInterval, defaultSyncInterval),
-               timeoutIdle:        duration(cluster.Containers.CloudVMs.TimeoutIdle, defaultTimeoutIdle),
-               timeoutBooting:     duration(cluster.Containers.CloudVMs.TimeoutBooting, defaultTimeoutBooting),
-               timeoutProbe:       duration(cluster.Containers.CloudVMs.TimeoutProbe, defaultTimeoutProbe),
-               timeoutShutdown:    duration(cluster.Containers.CloudVMs.TimeoutShutdown, defaultTimeoutShutdown),
-               timeoutTERM:        duration(cluster.Containers.CloudVMs.TimeoutTERM, defaultTimeoutTERM),
-               timeoutSignal:      duration(cluster.Containers.CloudVMs.TimeoutSignal, defaultTimeoutSignal),
-               installPublicKey:   installPublicKey,
-               tagKeyPrefix:       cluster.Containers.CloudVMs.TagKeyPrefix,
-               stop:               make(chan bool),
+               logger:                         logger,
+               arvClient:                      arvClient,
+               instanceSetID:                  instanceSetID,
+               instanceSet:                    &throttledInstanceSet{InstanceSet: instanceSet},
+               newExecutor:                    newExecutor,
+               bootProbeCommand:               cluster.Containers.CloudVMs.BootProbeCommand,
+               runnerSource:                   cluster.Containers.CloudVMs.DeployRunnerBinary,
+               imageID:                        cloud.ImageID(cluster.Containers.CloudVMs.ImageID),
+               instanceTypes:                  cluster.InstanceTypes,
+               maxProbesPerSecond:             cluster.Containers.CloudVMs.MaxProbesPerSecond,
+               maxConcurrentInstanceCreateOps: cluster.Containers.CloudVMs.MaxConcurrentInstanceCreateOps,
+               probeInterval:                  duration(cluster.Containers.CloudVMs.ProbeInterval, defaultProbeInterval),
+               syncInterval:                   duration(cluster.Containers.CloudVMs.SyncInterval, defaultSyncInterval),
+               timeoutIdle:                    duration(cluster.Containers.CloudVMs.TimeoutIdle, defaultTimeoutIdle),
+               timeoutBooting:                 duration(cluster.Containers.CloudVMs.TimeoutBooting, defaultTimeoutBooting),
+               timeoutProbe:                   duration(cluster.Containers.CloudVMs.TimeoutProbe, defaultTimeoutProbe),
+               timeoutShutdown:                duration(cluster.Containers.CloudVMs.TimeoutShutdown, defaultTimeoutShutdown),
+               timeoutTERM:                    duration(cluster.Containers.CloudVMs.TimeoutTERM, defaultTimeoutTERM),
+               timeoutSignal:                  duration(cluster.Containers.CloudVMs.TimeoutSignal, defaultTimeoutSignal),
+               installPublicKey:               installPublicKey,
+               tagKeyPrefix:                   cluster.Containers.CloudVMs.TagKeyPrefix,
+               stop:                           make(chan bool),
        }
        wp.registerMetrics(reg)
        go func() {
@@ -132,26 +133,27 @@ func NewPool(logger logrus.FieldLogger, arvClient *arvados.Client, reg *promethe
 // zero Pool should not be used. Call NewPool to create a new Pool.
 type Pool struct {
        // configuration
-       logger             logrus.FieldLogger
-       arvClient          *arvados.Client
-       instanceSetID      cloud.InstanceSetID
-       instanceSet        *throttledInstanceSet
-       newExecutor        func(cloud.Instance) Executor
-       bootProbeCommand   string
-       runnerSource       string
-       imageID            cloud.ImageID
-       instanceTypes      map[string]arvados.InstanceType
-       syncInterval       time.Duration
-       probeInterval      time.Duration
-       maxProbesPerSecond int
-       timeoutIdle        time.Duration
-       timeoutBooting     time.Duration
-       timeoutProbe       time.Duration
-       timeoutShutdown    time.Duration
-       timeoutTERM        time.Duration
-       timeoutSignal      time.Duration
-       installPublicKey   ssh.PublicKey
-       tagKeyPrefix       string
+       logger                         logrus.FieldLogger
+       arvClient                      *arvados.Client
+       instanceSetID                  cloud.InstanceSetID
+       instanceSet                    *throttledInstanceSet
+       newExecutor                    func(cloud.Instance) Executor
+       bootProbeCommand               string
+       runnerSource                   string
+       imageID                        cloud.ImageID
+       instanceTypes                  map[string]arvados.InstanceType
+       syncInterval                   time.Duration
+       probeInterval                  time.Duration
+       maxProbesPerSecond             int
+       maxConcurrentInstanceCreateOps int
+       timeoutIdle                    time.Duration
+       timeoutBooting                 time.Duration
+       timeoutProbe                   time.Duration
+       timeoutShutdown                time.Duration
+       timeoutTERM                    time.Duration
+       timeoutSignal                  time.Duration
+       installPublicKey               ssh.PublicKey
+       tagKeyPrefix                   string
 
        // private state
        subscribers  map[<-chan struct{}]chan<- struct{}
@@ -168,9 +170,6 @@ type Pool struct {
        runnerMD5    [md5.Size]byte
        runnerCmd    string
 
-       throttleCreate    throttle
-       throttleInstances throttle
-
        mContainersRunning prometheus.Gauge
        mInstances         *prometheus.GaugeVec
        mInstancesPrice    *prometheus.GaugeVec
@@ -298,7 +297,19 @@ func (wp *Pool) Create(it arvados.InstanceType) bool {
        }
        wp.mtx.Lock()
        defer wp.mtx.Unlock()
-       if time.Now().Before(wp.atQuotaUntil) || wp.throttleCreate.Error() != nil {
+       if time.Now().Before(wp.atQuotaUntil) || wp.instanceSet.throttleCreate.Error() != nil {
+               return false
+       }
+       // The maxConcurrentInstanceCreateOps knob throttles the number of node create
+       // requests in flight. It was added to work around a limitation in Azure's
+       // managed disks, which support no more than 20 concurrent node creation
+       // requests from a single disk image (cf.
+       // https://docs.microsoft.com/en-us/azure/virtual-machines/linux/capture-image).
+       // The code assumes that node creation, from Azure's perspective, means the
+       // period until the instance appears in the "get all instances" list.
+       if wp.maxConcurrentInstanceCreateOps > 0 && len(wp.creating) >= wp.maxConcurrentInstanceCreateOps {
+               logger.Info("reached MaxConcurrentInstanceCreateOps")
+               wp.instanceSet.throttleCreate.ErrorUntil(errors.New("reached MaxConcurrentInstanceCreateOps"), time.Now().Add(5*time.Second), wp.notify)
                return false
        }
        now := time.Now()
index 0c173c107d4a248ec38ca635f5fa0ac219af6a4b..a85f7383ab3cdc59fcc1bd0e7ad936703666ca2f 100644 (file)
@@ -199,6 +199,46 @@ func (suite *PoolSuite) TestDrain(c *check.C) {
        }
 }
 
+func (suite *PoolSuite) TestNodeCreateThrottle(c *check.C) {
+       logger := ctxlog.TestLogger(c)
+       driver := test.StubDriver{HoldCloudOps: true}
+       instanceSet, err := driver.InstanceSet(nil, "test-instance-set-id", nil, logger)
+       c.Assert(err, check.IsNil)
+
+       type1 := test.InstanceType(1)
+       pool := &Pool{
+               logger:                         logger,
+               instanceSet:                    &throttledInstanceSet{InstanceSet: instanceSet},
+               maxConcurrentInstanceCreateOps: 1,
+               instanceTypes: arvados.InstanceTypeMap{
+                       type1.Name: type1,
+               },
+       }
+
+       c.Check(pool.Unallocated()[type1], check.Equals, 0)
+       res := pool.Create(type1)
+       c.Check(pool.Unallocated()[type1], check.Equals, 1)
+       c.Check(res, check.Equals, true)
+
+       res = pool.Create(type1)
+       c.Check(pool.Unallocated()[type1], check.Equals, 1)
+       c.Check(res, check.Equals, false)
+
+       pool.instanceSet.throttleCreate.err = nil
+       pool.maxConcurrentInstanceCreateOps = 2
+
+       res = pool.Create(type1)
+       c.Check(pool.Unallocated()[type1], check.Equals, 2)
+       c.Check(res, check.Equals, true)
+
+       pool.instanceSet.throttleCreate.err = nil
+       pool.maxConcurrentInstanceCreateOps = 0
+
+       res = pool.Create(type1)
+       c.Check(pool.Unallocated()[type1], check.Equals, 3)
+       c.Check(res, check.Equals, true)
+}
+
 func (suite *PoolSuite) TestCreateUnallocShutdown(c *check.C) {
        logger := ctxlog.TestLogger(c)
        driver := test.StubDriver{HoldCloudOps: true}
index 6e1549224b79c15e3674b76091c06a229589f16d..829d4097c8496daea349d3fe290367ff2e8c9eda 100644 (file)
@@ -446,23 +446,24 @@ type ContainersConfig struct {
 type CloudVMsConfig struct {
        Enable bool
 
-       BootProbeCommand     string
-       DeployRunnerBinary   string
-       ImageID              string
-       MaxCloudOpsPerSecond int
-       MaxProbesPerSecond   int
-       PollInterval         Duration
-       ProbeInterval        Duration
-       SSHPort              string
-       SyncInterval         Duration
-       TimeoutBooting       Duration
-       TimeoutIdle          Duration
-       TimeoutProbe         Duration
-       TimeoutShutdown      Duration
-       TimeoutSignal        Duration
-       TimeoutTERM          Duration
-       ResourceTags         map[string]string
-       TagKeyPrefix         string
+       BootProbeCommand               string
+       DeployRunnerBinary             string
+       ImageID                        string
+       MaxCloudOpsPerSecond           int
+       MaxProbesPerSecond             int
+       MaxConcurrentInstanceCreateOps int
+       PollInterval                   Duration
+       ProbeInterval                  Duration
+       SSHPort                        string
+       SyncInterval                   Duration
+       TimeoutBooting                 Duration
+       TimeoutIdle                    Duration
+       TimeoutProbe                   Duration
+       TimeoutShutdown                Duration
+       TimeoutSignal                  Duration
+       TimeoutTERM                    Duration
+       ResourceTags                   map[string]string
+       TagKeyPrefix                   string
 
        Driver           string
        DriverParameters json.RawMessage