Merge branch '16726-anon-user-token' refs #16726

author Peter Amstutz <peter.amstutz@curii.com>

Thu, 3 Sep 2020 13:41:02 +0000 (09:41 -0400)

committer Peter Amstutz <peter.amstutz@curii.com>

Thu, 3 Sep 2020 13:41:02 +0000 (09:41 -0400)
author Peter Amstutz <peter.amstutz@curii.com>
Thu, 3 Sep 2020 13:41:02 +0000 (09:41 -0400)
committer Peter Amstutz <peter.amstutz@curii.com>
Thu, 3 Sep 2020 13:41:02 +0000 (09:41 -0400)
diff --git a/doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid b/doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid

index 68417784701ce387e7437bb0f0b8e62a2335e5ff..151e211653c0d77d73af31749bf71836124998e1 100644 (file)
--- a/doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid
+++ b/doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid
@@ -100,6 +100,9 @@ Using managed disks:
        CloudVMs:
          ImageID: "zzzzz-compute-v1597349873"
          Driver: azure
+        # (azure) managed disks: set MaxConcurrentInstanceCreateOps to 20 to avoid timeouts, cf
+        # https://docs.microsoft.com/en-us/azure/virtual-machines/linux/capture-image
+        MaxConcurrentInstanceCreateOps: 20
          DriverParameters:
            # Credentials.
            SubscriptionID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml

index 80294afaf35f1f701928f0c1ce99c3c09ca35e09..b1865a2217ce99c48a13ae8a17e4cf10d336cbf0 100644 (file)
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -945,6 +945,12 @@ Clusters:
          # unlimited).
          MaxCloudOpsPerSecond: 0
  
+        # Maximum concurrent node creation operations (0 = unlimited). This is
+        # recommended by Azure in certain scenarios (see
+        # https://docs.microsoft.com/en-us/azure/virtual-machines/linux/capture-image)
+        # and can be used with other cloud providers too, if desired.
+        MaxConcurrentInstanceCreateOps: 0
+
          # Interval between cloud provider syncs/updates ("list all
          # instances").
          SyncInterval: 1m
@@ -1332,6 +1338,10 @@ Clusters:
        VocabularyURL: ""
        FileViewersConfigURL: ""
  
+      # Idle time after which the user's session will be auto closed.
+      # This feature is disabled when set to zero.
+      IdleTimeout: 0s
+
        # Workbench welcome screen, this is HTML text that will be
        # incorporated directly onto the page.
        WelcomePageHTML: |
diff --git a/lib/config/export.go b/lib/config/export.go

index b203dff26a1b80272be29f41dd747c8108beb272..1ccb487ad85ddbc7fcdf95ff4ccb8a8e9f85b748 100644 (file)
--- a/lib/config/export.go
+++ b/lib/config/export.go
@@ -173,7 +173,7 @@ var whitelist = map[string]bool{
         "Login.Test":                                   true,
         "Login.Test.Enable":                            true,
         "Login.Test.Users":                             false,
-       "Login.TokenLifetime":                          false,
+       "Login.TokenLifetime":                          true,
         "Mail":                                         true,
         "Mail.EmailFrom":                               false,
         "Mail.IssueReporterEmailFrom":                  false,
@@ -237,6 +237,7 @@ var whitelist = map[string]bool{
         "Workbench.EnableGettingStartedPopup":          true,
         "Workbench.EnablePublicProjectsPage":           true,
         "Workbench.FileViewersConfigURL":               true,
+       "Workbench.IdleTimeout":                        true,
         "Workbench.InactivePageHTML":                   true,
         "Workbench.LogViewerMaxBytes":                  true,
         "Workbench.MultiSiteSearch":                    true,
diff --git a/lib/config/generated_config.go b/lib/config/generated_config.go

index 57204cf36a2dbe49731c2d7cc32ad51f09522f0a..201ae3604537f9f44a9e788320b7262685944f98 100644 (file)
--- a/lib/config/generated_config.go
+++ b/lib/config/generated_config.go
@@ -951,6 +951,12 @@ Clusters:
          # unlimited).
          MaxCloudOpsPerSecond: 0
  
+        # Maximum concurrent node creation operations (0 = unlimited). This is
+        # recommended by Azure in certain scenarios (see
+        # https://docs.microsoft.com/en-us/azure/virtual-machines/linux/capture-image)
+        # and can be used with other cloud providers too, if desired.
+        MaxConcurrentInstanceCreateOps: 0
+
          # Interval between cloud provider syncs/updates ("list all
          # instances").
          SyncInterval: 1m
@@ -1338,6 +1344,10 @@ Clusters:
        VocabularyURL: ""
        FileViewersConfigURL: ""
  
+      # Idle time after which the user's session will be auto closed.
+      # This feature is disabled when set to zero.
+      IdleTimeout: 0s
+
        # Workbench welcome screen, this is HTML text that will be
        # incorporated directly onto the page.
        WelcomePageHTML: |
diff --git a/lib/dispatchcloud/scheduler/run_queue.go b/lib/dispatchcloud/scheduler/run_queue.go

index d77dcee947951953b46da631c0a66ee15894a19f..0e8e1dc2ec38f1394bb4c5899240e1e6cbf7cf1c 100644 (file)
--- a/lib/dispatchcloud/scheduler/run_queue.go
+++ b/lib/dispatchcloud/scheduler/run_queue.go
@@ -61,30 +61,25 @@ tryrun:
                         if unalloc[it] > 0 {
                                 unalloc[it]--
                         } else if sch.pool.AtQuota() {
-                               logger.Debug("not starting: AtQuota and no unalloc workers")
+                               // Don't let lower-priority containers
+                               // starve this one by using keeping
+                               // idle workers alive on different
+                               // instance types.
+                               logger.Debug("unlocking: AtQuota and no unalloc workers")
+                               sch.queue.Unlock(ctr.UUID)
                                 overquota = sorted[i:]
                                 break tryrun
+                       } else if logger.Info("creating new instance"); sch.pool.Create(it) {
+                               // Success. (Note pool.Create works
+                               // asynchronously and does its own
+                               // logging, so we don't need to.)
                         } else {
-                               logger.Info("creating new instance")
-                               if !sch.pool.Create(it) {
-                                       // (Note pool.Create works
-                                       // asynchronously and logs its
-                                       // own failures, so we don't
-                                       // need to log this as a
-                                       // failure.)
-
-                                       sch.queue.Unlock(ctr.UUID)
-                                       // Don't let lower-priority
-                                       // containers starve this one
-                                       // by using keeping idle
-                                       // workers alive on different
-                                       // instance types.  TODO:
-                                       // avoid getting starved here
-                                       // if instances of a specific
-                                       // type always fail.
-                                       overquota = sorted[i:]
-                                       break tryrun
-                               }
+                               // Failed despite not being at quota,
+                               // e.g., cloud ops throttled.  TODO:
+                               // avoid getting starved here if
+                               // instances of a specific type always
+                               // fail.
+                               continue
                         }
  
                         if dontstart[it] {
diff --git a/lib/dispatchcloud/scheduler/run_queue_test.go b/lib/dispatchcloud/scheduler/run_queue_test.go

index fbc73ef50c4695a7d3605db8180f1a3d263fa869..530eb5db93d02a9c4c2832f0fc090583c0432f93 100644 (file)
--- a/lib/dispatchcloud/scheduler/run_queue_test.go
+++ b/lib/dispatchcloud/scheduler/run_queue_test.go
@@ -38,7 +38,7 @@ type stubPool struct {
         idle      map[arvados.InstanceType]int
         unknown   map[arvados.InstanceType]int
         running   map[string]time.Time
-       atQuota   bool
+       quota     int
         canCreate int
         creates   []arvados.InstanceType
         starts    []string
@@ -46,7 +46,11 @@ type stubPool struct {
         sync.Mutex
  }
  
-func (p *stubPool) AtQuota() bool               { return p.atQuota }
+func (p *stubPool) AtQuota() bool {
+       p.Lock()
+       defer p.Unlock()
+       return len(p.unalloc)+len(p.running)+len(p.unknown) >= p.quota
+}
  func (p *stubPool) Subscribe() <-chan struct{}  { return p.notify }
  func (p *stubPool) Unsubscribe(<-chan struct{}) {}
  func (p *stubPool) Running() map[string]time.Time {
@@ -122,11 +126,8 @@ var _ = check.Suite(&SchedulerSuite{})
  
  type SchedulerSuite struct{}
  
-// Assign priority=4 container to idle node. Create a new instance for
-// the priority=3 container. Don't try to start any priority<3
-// containers because priority=3 container didn't start
-// immediately. Don't try to create any other nodes after the failed
-// create.
+// Assign priority=4 container to idle node. Create new instances for
+// the priority=3, 2, 1 containers.
  func (*SchedulerSuite) TestUseIdleWorkers(c *check.C) {
         ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c))
         queue := test.Queue{
@@ -172,6 +173,7 @@ func (*SchedulerSuite) TestUseIdleWorkers(c *check.C) {
         }
         queue.Update()
         pool := stubPool{
+               quota: 1000,
                 unalloc: map[arvados.InstanceType]int{
                         test.InstanceType(1): 1,
                         test.InstanceType(2): 2,
@@ -184,7 +186,7 @@ func (*SchedulerSuite) TestUseIdleWorkers(c *check.C) {
                 canCreate: 0,
         }
         New(ctx, &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
-       c.Check(pool.creates, check.DeepEquals, []arvados.InstanceType{test.InstanceType(1)})
+       c.Check(pool.creates, check.DeepEquals, []arvados.InstanceType{test.InstanceType(1), test.InstanceType(1), test.InstanceType(1)})
         c.Check(pool.starts, check.DeepEquals, []string{test.ContainerUUID(4)})
         c.Check(pool.running, check.HasLen, 1)
         for uuid := range pool.running {
@@ -192,14 +194,14 @@ func (*SchedulerSuite) TestUseIdleWorkers(c *check.C) {
         }
  }
  
-// If Create() fails, shutdown some nodes, and don't call Create()
-// again.  Don't call Create() at all if AtQuota() is true.
+// If pool.AtQuota() is true, shutdown some unalloc nodes, and don't
+// call Create().
  func (*SchedulerSuite) TestShutdownAtQuota(c *check.C) {
         ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c))
-       for quota := 0; quota < 2; quota++ {
+       for quota := 1; quota < 3; quota++ {
                 c.Logf("quota=%d", quota)
                 shouldCreate := []arvados.InstanceType{}
-               for i := 0; i < quota; i++ {
+               for i := 1; i < quota; i++ {
                         shouldCreate = append(shouldCreate, test.InstanceType(3))
                 }
                 queue := test.Queue{
@@ -227,7 +229,7 @@ func (*SchedulerSuite) TestShutdownAtQuota(c *check.C) {
                 }
                 queue.Update()
                 pool := stubPool{
-                       atQuota: quota == 0,
+                       quota: quota,
                         unalloc: map[arvados.InstanceType]int{
                                 test.InstanceType(2): 2,
                         },
@@ -241,8 +243,13 @@ func (*SchedulerSuite) TestShutdownAtQuota(c *check.C) {
                 }
                 New(ctx, &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
                 c.Check(pool.creates, check.DeepEquals, shouldCreate)
-               c.Check(pool.starts, check.DeepEquals, []string{})
-               c.Check(pool.shutdowns, check.Not(check.Equals), 0)
+               if len(shouldCreate) == 0 {
+                       c.Check(pool.starts, check.DeepEquals, []string{})
+                       c.Check(pool.shutdowns, check.Not(check.Equals), 0)
+               } else {
+                       c.Check(pool.starts, check.DeepEquals, []string{test.ContainerUUID(2)})
+                       c.Check(pool.shutdowns, check.Equals, 0)
+               }
         }
  }
  
@@ -251,6 +258,7 @@ func (*SchedulerSuite) TestShutdownAtQuota(c *check.C) {
  func (*SchedulerSuite) TestStartWhileCreating(c *check.C) {
         ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c))
         pool := stubPool{
+               quota: 1000,
                 unalloc: map[arvados.InstanceType]int{
                         test.InstanceType(1): 2,
                         test.InstanceType(2): 2,
@@ -345,6 +353,7 @@ func (*SchedulerSuite) TestStartWhileCreating(c *check.C) {
  func (*SchedulerSuite) TestKillNonexistentContainer(c *check.C) {
         ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c))
         pool := stubPool{
+               quota: 1000,
                 unalloc: map[arvados.InstanceType]int{
                         test.InstanceType(2): 0,
                 },
diff --git a/lib/dispatchcloud/test/stub_driver.go b/lib/dispatchcloud/test/stub_driver.go

index 41eb20763c75248c6cea81a2e9854ad2dfde42a8..132bd4d695f0ef88095951b151be592029c31328 100644 (file)
--- a/lib/dispatchcloud/test/stub_driver.go
+++ b/lib/dispatchcloud/test/stub_driver.go
@@ -274,13 +274,11 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader,
                                 svm.Lock()
                                 defer svm.Unlock()
                                 if svm.running[uuid] != pid {
-                                       if !completed {
-                                               bugf := svm.sis.driver.Bugf
-                                               if bugf == nil {
-                                                       bugf = logger.Warnf
-                                               }
-                                               bugf("[test] StubDriver bug or caller bug: pid %d exiting, running[%s]==%d", pid, uuid, svm.running[uuid])
+                                       bugf := svm.sis.driver.Bugf
+                                       if bugf == nil {
+                                               bugf = logger.Warnf
                                         }
+                                       bugf("[test] StubDriver bug or caller bug: pid %d exiting, running[%s]==%d", pid, uuid, svm.running[uuid])
                                 } else {
                                         delete(svm.running, uuid)
                                 }
@@ -305,7 +303,7 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader,
                         time.Sleep(time.Duration(math_rand.Float64()*20) * time.Millisecond)
  
                         svm.Lock()
-                       killed := svm.running[uuid] != pid
+                       killed := svm.killing[uuid]
                         svm.Unlock()
                         if killed || wantCrashEarly {
                                 return
@@ -345,21 +343,9 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader,
         }
         if strings.HasPrefix(command, "crunch-run --kill ") {
                 svm.Lock()
-               pid, running := svm.running[uuid]
-               if running && !svm.killing[uuid] {
+               _, running := svm.running[uuid]
+               if running {
                         svm.killing[uuid] = true
-                       go func() {
-                               time.Sleep(time.Duration(math_rand.Float64()*30) * time.Millisecond)
-                               svm.Lock()
-                               defer svm.Unlock()
-                               if svm.running[uuid] == pid {
-                                       // Kill only if the running entry
-                                       // hasn't since been killed and
-                                       // replaced with a different one.
-                                       delete(svm.running, uuid)
-                               }
-                               delete(svm.killing, uuid)
-                       }()
                         svm.Unlock()
                         time.Sleep(time.Duration(math_rand.Float64()*2) * time.Millisecond)
                         svm.Lock()
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go

index 12bc1cdd71636263cebc0c8f21bd283d791aec04..435b6e43ae4a3e150f680883aa7e124daf6e4230 100644 (file)
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -96,27 +96,28 @@ func duration(conf arvados.Duration, def time.Duration) time.Duration {
  // cluster configuration.
  func NewPool(logger logrus.FieldLogger, arvClient *arvados.Client, reg *prometheus.Registry, instanceSetID cloud.InstanceSetID, instanceSet cloud.InstanceSet, newExecutor func(cloud.Instance) Executor, installPublicKey ssh.PublicKey, cluster *arvados.Cluster) *Pool {
         wp := &Pool{
-               logger:             logger,
-               arvClient:          arvClient,
-               instanceSetID:      instanceSetID,
-               instanceSet:        &throttledInstanceSet{InstanceSet: instanceSet},
-               newExecutor:        newExecutor,
-               bootProbeCommand:   cluster.Containers.CloudVMs.BootProbeCommand,
-               runnerSource:       cluster.Containers.CloudVMs.DeployRunnerBinary,
-               imageID:            cloud.ImageID(cluster.Containers.CloudVMs.ImageID),
-               instanceTypes:      cluster.InstanceTypes,
-               maxProbesPerSecond: cluster.Containers.CloudVMs.MaxProbesPerSecond,
-               probeInterval:      duration(cluster.Containers.CloudVMs.ProbeInterval, defaultProbeInterval),
-               syncInterval:       duration(cluster.Containers.CloudVMs.SyncInterval, defaultSyncInterval),
-               timeoutIdle:        duration(cluster.Containers.CloudVMs.TimeoutIdle, defaultTimeoutIdle),
-               timeoutBooting:     duration(cluster.Containers.CloudVMs.TimeoutBooting, defaultTimeoutBooting),
-               timeoutProbe:       duration(cluster.Containers.CloudVMs.TimeoutProbe, defaultTimeoutProbe),
-               timeoutShutdown:    duration(cluster.Containers.CloudVMs.TimeoutShutdown, defaultTimeoutShutdown),
-               timeoutTERM:        duration(cluster.Containers.CloudVMs.TimeoutTERM, defaultTimeoutTERM),
-               timeoutSignal:      duration(cluster.Containers.CloudVMs.TimeoutSignal, defaultTimeoutSignal),
-               installPublicKey:   installPublicKey,
-               tagKeyPrefix:       cluster.Containers.CloudVMs.TagKeyPrefix,
-               stop:               make(chan bool),
+               logger:                         logger,
+               arvClient:                      arvClient,
+               instanceSetID:                  instanceSetID,
+               instanceSet:                    &throttledInstanceSet{InstanceSet: instanceSet},
+               newExecutor:                    newExecutor,
+               bootProbeCommand:               cluster.Containers.CloudVMs.BootProbeCommand,
+               runnerSource:                   cluster.Containers.CloudVMs.DeployRunnerBinary,
+               imageID:                        cloud.ImageID(cluster.Containers.CloudVMs.ImageID),
+               instanceTypes:                  cluster.InstanceTypes,
+               maxProbesPerSecond:             cluster.Containers.CloudVMs.MaxProbesPerSecond,
+               maxConcurrentInstanceCreateOps: cluster.Containers.CloudVMs.MaxConcurrentInstanceCreateOps,
+               probeInterval:                  duration(cluster.Containers.CloudVMs.ProbeInterval, defaultProbeInterval),
+               syncInterval:                   duration(cluster.Containers.CloudVMs.SyncInterval, defaultSyncInterval),
+               timeoutIdle:                    duration(cluster.Containers.CloudVMs.TimeoutIdle, defaultTimeoutIdle),
+               timeoutBooting:                 duration(cluster.Containers.CloudVMs.TimeoutBooting, defaultTimeoutBooting),
+               timeoutProbe:                   duration(cluster.Containers.CloudVMs.TimeoutProbe, defaultTimeoutProbe),
+               timeoutShutdown:                duration(cluster.Containers.CloudVMs.TimeoutShutdown, defaultTimeoutShutdown),
+               timeoutTERM:                    duration(cluster.Containers.CloudVMs.TimeoutTERM, defaultTimeoutTERM),
+               timeoutSignal:                  duration(cluster.Containers.CloudVMs.TimeoutSignal, defaultTimeoutSignal),
+               installPublicKey:               installPublicKey,
+               tagKeyPrefix:                   cluster.Containers.CloudVMs.TagKeyPrefix,
+               stop:                           make(chan bool),
         }
         wp.registerMetrics(reg)
         go func() {
@@ -132,26 +133,27 @@ func NewPool(logger logrus.FieldLogger, arvClient *arvados.Client, reg *promethe
  // zero Pool should not be used. Call NewPool to create a new Pool.
  type Pool struct {
         // configuration
-       logger             logrus.FieldLogger
-       arvClient          *arvados.Client
-       instanceSetID      cloud.InstanceSetID
-       instanceSet        *throttledInstanceSet
-       newExecutor        func(cloud.Instance) Executor
-       bootProbeCommand   string
-       runnerSource       string
-       imageID            cloud.ImageID
-       instanceTypes      map[string]arvados.InstanceType
-       syncInterval       time.Duration
-       probeInterval      time.Duration
-       maxProbesPerSecond int
-       timeoutIdle        time.Duration
-       timeoutBooting     time.Duration
-       timeoutProbe       time.Duration
-       timeoutShutdown    time.Duration
-       timeoutTERM        time.Duration
-       timeoutSignal      time.Duration
-       installPublicKey   ssh.PublicKey
-       tagKeyPrefix       string
+       logger                         logrus.FieldLogger
+       arvClient                      *arvados.Client
+       instanceSetID                  cloud.InstanceSetID
+       instanceSet                    *throttledInstanceSet
+       newExecutor                    func(cloud.Instance) Executor
+       bootProbeCommand               string
+       runnerSource                   string
+       imageID                        cloud.ImageID
+       instanceTypes                  map[string]arvados.InstanceType
+       syncInterval                   time.Duration
+       probeInterval                  time.Duration
+       maxProbesPerSecond             int
+       maxConcurrentInstanceCreateOps int
+       timeoutIdle                    time.Duration
+       timeoutBooting                 time.Duration
+       timeoutProbe                   time.Duration
+       timeoutShutdown                time.Duration
+       timeoutTERM                    time.Duration
+       timeoutSignal                  time.Duration
+       installPublicKey               ssh.PublicKey
+       tagKeyPrefix                   string
  
         // private state
         subscribers  map[<-chan struct{}]chan<- struct{}
@@ -168,9 +170,6 @@ type Pool struct {
         runnerMD5    [md5.Size]byte
         runnerCmd    string
  
-       throttleCreate    throttle
-       throttleInstances throttle
-
         mContainersRunning prometheus.Gauge
         mInstances         *prometheus.GaugeVec
         mInstancesPrice    *prometheus.GaugeVec
@@ -298,7 +297,19 @@ func (wp *Pool) Create(it arvados.InstanceType) bool {
         }
         wp.mtx.Lock()
         defer wp.mtx.Unlock()
-       if time.Now().Before(wp.atQuotaUntil) || wp.throttleCreate.Error() != nil {
+       if time.Now().Before(wp.atQuotaUntil) || wp.instanceSet.throttleCreate.Error() != nil {
+               return false
+       }
+       // The maxConcurrentInstanceCreateOps knob throttles the number of node create
+       // requests in flight. It was added to work around a limitation in Azure's
+       // managed disks, which support no more than 20 concurrent node creation
+       // requests from a single disk image (cf.
+       // https://docs.microsoft.com/en-us/azure/virtual-machines/linux/capture-image).
+       // The code assumes that node creation, from Azure's perspective, means the
+       // period until the instance appears in the "get all instances" list.
+       if wp.maxConcurrentInstanceCreateOps > 0 && len(wp.creating) >= wp.maxConcurrentInstanceCreateOps {
+               logger.Info("reached MaxConcurrentInstanceCreateOps")
+               wp.instanceSet.throttleCreate.ErrorUntil(errors.New("reached MaxConcurrentInstanceCreateOps"), time.Now().Add(5*time.Second), wp.notify)
                 return false
         }
         now := time.Now()
diff --git a/lib/dispatchcloud/worker/pool_test.go b/lib/dispatchcloud/worker/pool_test.go

index 0c173c107d4a248ec38ca635f5fa0ac219af6a4b..a85f7383ab3cdc59fcc1bd0e7ad936703666ca2f 100644 (file)
--- a/lib/dispatchcloud/worker/pool_test.go
+++ b/lib/dispatchcloud/worker/pool_test.go
@@ -199,6 +199,46 @@ func (suite *PoolSuite) TestDrain(c *check.C) {
         }
  }
  
+func (suite *PoolSuite) TestNodeCreateThrottle(c *check.C) {
+       logger := ctxlog.TestLogger(c)
+       driver := test.StubDriver{HoldCloudOps: true}
+       instanceSet, err := driver.InstanceSet(nil, "test-instance-set-id", nil, logger)
+       c.Assert(err, check.IsNil)
+
+       type1 := test.InstanceType(1)
+       pool := &Pool{
+               logger:                         logger,
+               instanceSet:                    &throttledInstanceSet{InstanceSet: instanceSet},
+               maxConcurrentInstanceCreateOps: 1,
+               instanceTypes: arvados.InstanceTypeMap{
+                       type1.Name: type1,
+               },
+       }
+
+       c.Check(pool.Unallocated()[type1], check.Equals, 0)
+       res := pool.Create(type1)
+       c.Check(pool.Unallocated()[type1], check.Equals, 1)
+       c.Check(res, check.Equals, true)
+
+       res = pool.Create(type1)
+       c.Check(pool.Unallocated()[type1], check.Equals, 1)
+       c.Check(res, check.Equals, false)
+
+       pool.instanceSet.throttleCreate.err = nil
+       pool.maxConcurrentInstanceCreateOps = 2
+
+       res = pool.Create(type1)
+       c.Check(pool.Unallocated()[type1], check.Equals, 2)
+       c.Check(res, check.Equals, true)
+
+       pool.instanceSet.throttleCreate.err = nil
+       pool.maxConcurrentInstanceCreateOps = 0
+
+       res = pool.Create(type1)
+       c.Check(pool.Unallocated()[type1], check.Equals, 3)
+       c.Check(res, check.Equals, true)
+}
+
  func (suite *PoolSuite) TestCreateUnallocShutdown(c *check.C) {
         logger := ctxlog.TestLogger(c)
         driver := test.StubDriver{HoldCloudOps: true}
diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go

index 6e1549224b79c15e3674b76091c06a229589f16d..363d09dafb5452b350077d6933ed2698689fb513 100644 (file)
--- a/sdk/go/arvados/config.go
+++ b/sdk/go/arvados/config.go
@@ -259,6 +259,7 @@ type Cluster struct {
                 InactivePageHTML       string
                 SSHHelpPageHTML        string
                 SSHHelpHostSuffix      string
+               IdleTimeout            Duration
         }
  
         ForceLegacyAPI14 bool
@@ -446,23 +447,24 @@ type ContainersConfig struct {
  type CloudVMsConfig struct {
         Enable bool
  
-       BootProbeCommand     string
-       DeployRunnerBinary   string
-       ImageID              string
-       MaxCloudOpsPerSecond int
-       MaxProbesPerSecond   int
-       PollInterval         Duration
-       ProbeInterval        Duration
-       SSHPort              string
-       SyncInterval         Duration
-       TimeoutBooting       Duration
-       TimeoutIdle          Duration
-       TimeoutProbe         Duration
-       TimeoutShutdown      Duration
-       TimeoutSignal        Duration
-       TimeoutTERM          Duration
-       ResourceTags         map[string]string
-       TagKeyPrefix         string
+       BootProbeCommand               string
+       DeployRunnerBinary             string
+       ImageID                        string
+       MaxCloudOpsPerSecond           int
+       MaxProbesPerSecond             int
+       MaxConcurrentInstanceCreateOps int
+       PollInterval                   Duration
+       ProbeInterval                  Duration
+       SSHPort                        string
+       SyncInterval                   Duration
+       TimeoutBooting                 Duration
+       TimeoutIdle                    Duration
+       TimeoutProbe                   Duration
+       TimeoutShutdown                Duration
+       TimeoutSignal                  Duration
+       TimeoutTERM                    Duration
+       ResourceTags                   map[string]string
+       TagKeyPrefix                   string
  
         Driver           string
         DriverParameters json.RawMessage
diff --git a/sdk/python/setup.py b/sdk/python/setup.py

index 589533177a4b83b5c481e2ff122b7594d536133a..3fff82c42d67f3045fe02d87fc44914709800e54 100644 (file)
--- a/sdk/python/setup.py
+++ b/sdk/python/setup.py
@@ -50,7 +50,7 @@ setup(name='arvados-python-client',
            'future',
            'google-api-python-client >=1.6.2, <1.7',
            'httplib2 >=0.9.2',
-          'pycurl >=7.19.5.1',
+          'pycurl >=7.19.5.1, <7.43.0.4', # 7.43.0.4 removes support for python2
            'ruamel.yaml >=0.15.54, <=0.16.5',
            'setuptools',
            'ws4py >=0.4.2',
diff --git a/services/keep-web/s3.go b/services/keep-web/s3.go

index 4e8028ae6e8de3c429679c7ff25fc87ee58f3cf2..52cfede46642120dce520868d2d38758fb97cc12 100644 (file)
--- a/services/keep-web/s3.go
+++ b/services/keep-web/s3.go
@@ -358,6 +358,11 @@ func (h *handler) s3list(w http.ResponseWriter, r *http.Request, fs arvados.Cust
                 // CommonPrefixes is nil, which confuses some clients.
                 // Fix by using this nested struct instead.
                 CommonPrefixes []commonPrefix
+               // Similarly, we need omitempty here, because an empty
+               // tag confuses some clients (e.g.,
+               // github.com/aws/aws-sdk-net never terminates its
+               // paging loop).
+               NextMarker string `xml:"NextMarker,omitempty"`
         }
         resp := listResp{
                 ListResp: s3.ListResp{
diff --git a/services/keep-web/s3_test.go b/services/keep-web/s3_test.go

index c6d53238e81645928acd4b76aec520bea6674c4c..66f046b13f14674c35bc783351eb9a0bf5f1b64b 100644 (file)
--- a/services/keep-web/s3_test.go
+++ b/services/keep-web/s3_test.go
@@ -154,9 +154,10 @@ func (s *IntegrationSuite) testS3GetObject(c *check.C, bucket *s3.Bucket, prefix
         c.Check(err, check.IsNil)
  
         // HeadObject
-       exists, err = bucket.Exists(prefix + "sailboat.txt")
+       resp, err := bucket.Head(prefix+"sailboat.txt", nil)
         c.Check(err, check.IsNil)
-       c.Check(exists, check.Equals, true)
+       c.Check(resp.StatusCode, check.Equals, http.StatusOK)
+       c.Check(resp.ContentLength, check.Equals, int64(4))
  }
  
  func (s *IntegrationSuite) TestS3CollectionPutObjectSuccess(c *check.C) {
@@ -411,6 +412,26 @@ func (s *IntegrationSuite) TestS3ListNoCommonPrefixes(c *check.C) {
         c.Check(string(buf), check.Not(check.Matches), `(?ms).*CommonPrefixes.*`)
  }
  
+// If there is no delimiter in the request, or the results are not
+// truncated, the NextMarker XML tag should not appear in the response
+// body.
+func (s *IntegrationSuite) TestS3ListNoNextMarker(c *check.C) {
+       stage := s.s3setup(c)
+       defer stage.teardown(c)
+
+       for _, query := range []string{"prefix=e&delimiter=/", ""} {
+               req, err := http.NewRequest("GET", stage.collbucket.URL("/"), nil)
+               c.Assert(err, check.IsNil)
+               req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none")
+               req.URL.RawQuery = query
+               resp, err := http.DefaultClient.Do(req)
+               c.Assert(err, check.IsNil)
+               buf, err := ioutil.ReadAll(resp.Body)
+               c.Assert(err, check.IsNil)
+               c.Check(string(buf), check.Not(check.Matches), `(?ms).*NextMarker.*`)
+       }
+}
+
  func (s *IntegrationSuite) TestS3CollectionList(c *check.C) {
         stage := s.s3setup(c)
         defer stage.teardown(c)
author	Peter Amstutz <peter.amstutz@curii.com>
	Thu, 3 Sep 2020 13:41:02 +0000 (09:41 -0400)
committer	Peter Amstutz <peter.amstutz@curii.com>
	Thu, 3 Sep 2020 13:41:02 +0000 (09:41 -0400)
doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid		patch \| blob \| history
lib/config/config.default.yml		patch \| blob \| history
lib/config/export.go		patch \| blob \| history
lib/config/generated_config.go		patch \| blob \| history
lib/dispatchcloud/scheduler/run_queue.go		patch \| blob \| history
lib/dispatchcloud/scheduler/run_queue_test.go		patch \| blob \| history
lib/dispatchcloud/test/stub_driver.go		patch \| blob \| history
lib/dispatchcloud/worker/pool.go		patch \| blob \| history
lib/dispatchcloud/worker/pool_test.go		patch \| blob \| history
sdk/go/arvados/config.go		patch \| blob \| history
sdk/python/setup.py		patch \| blob \| history
services/keep-web/s3.go		patch \| blob \| history
services/keep-web/s3_test.go		patch \| blob \| history