Merge branch 'master' into 13822-nm-delayed-daemon
authorPeter Amstutz <pamstutz@veritasgenetics.com>
Tue, 17 Jul 2018 14:26:29 +0000 (10:26 -0400)
committerPeter Amstutz <pamstutz@veritasgenetics.com>
Tue, 17 Jul 2018 14:27:05 +0000 (10:27 -0400)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz@veritasgenetics.com>

1  2 
services/nodemanager/arvnodeman/computenode/driver/__init__.py
services/nodemanager/arvnodeman/computenode/driver/azure.py
services/nodemanager/arvnodeman/computenode/driver/ec2.py
services/nodemanager/arvnodeman/computenode/driver/gce.py
services/nodemanager/arvnodeman/daemon.py

index 22ffa24079b6d32ea05a6d7daa9dc3aab777f867,9e38d13eb7f4788d8af485a7e5b4b6589c9f324c..48d19f592bbdb0b87d905bac377c849000b59ef1
@@@ -35,10 -35,8 +35,10 @@@ class BaseComputeNodeDriver(RetryMixin)
          return driver_class(**auth_kwargs)
  
      @RetryMixin._retry()
 -    def _set_sizes(self):
 -        self.sizes = {sz.id: sz for sz in self.real.list_sizes()}
 +    def sizes(self):
 +        if self._sizes is None:
 +            self._sizes = {sz.id: sz for sz in self.real.list_sizes()}
 +        return self._sizes
  
      def __init__(self, auth_kwargs, list_kwargs, create_kwargs,
                   driver_class, retry_wait=1, max_retry_wait=180):
@@@ -75,7 -73,7 +75,7 @@@
                  if new_pair is not None:
                      self.create_kwargs[new_pair[0]] = new_pair[1]
  
 -        self._set_sizes()
 +        self._sizes = None
  
      def _init_ping_host(self, ping_host):
          self.ping_host = ping_host
          try:
              kwargs = self.create_kwargs.copy()
              kwargs.update(self.arvados_create_kwargs(size, arvados_node))
-             kwargs['size'] = size
+             kwargs['size'] = size.real
              return self.real.create_node(**kwargs)
          except CLOUD_ERRORS as create_error:
              # Workaround for bug #6702: sometimes the create node request
index aa8f3c769557a029f380f59609495ca422a7f7af,ae554327ca20d929a92b595da54e32ba05e6485f..719124d4000f724a271077d9f1614c50c6788f8d
@@@ -46,6 -46,8 +46,8 @@@ class ComputeNodeDriver(BaseComputeNode
  
      def arvados_create_kwargs(self, size, arvados_node):
          tags = {
+             # Set up tag indicating the Arvados assigned Cloud Size id.
+             'arvados_node_size': size.id,
              'booted_at': time.strftime(ARVADOS_TIMEFMT, time.gmtime()),
              'arv-ping-url': self._make_ping_url(arvados_node)
          }
@@@ -83,11 -85,12 +85,12 @@@ echo %s > /var/tmp/arv-node-data/meta-d
          # Do our own filtering based on tag.
          nodes = [node for node in
                  super(ComputeNodeDriver, self).list_nodes(ex_fetch_nic=False, ex_fetch_power_state=False)
-                 if node.extra["tags"].get("arvados-class") == self.tags["arvados-class"]]
+                 if node.extra.get("tags", {}).get("arvados-class") == self.tags["arvados-class"]]
          for n in nodes:
              # Need to populate Node.size
              if not n.size:
 -                n.size = self.sizes[n.extra["properties"]["hardwareProfile"]["vmSize"]]
 +                n.size = self.sizes()[n.extra["properties"]["hardwareProfile"]["vmSize"]]
+             n.extra['arvados_node_size'] = n.extra.get('tags', {}).get('arvados_node_size')
          return nodes
  
      def broken(self, cloud_node):
index 07ed90dfa517cc656f75886ff2dedde19761566f,2b1564279717d0d0159bd3c07b307b4b5675c98f..56812d258a92212b02a53d9775534d8b23b50b69
@@@ -91,18 -91,27 +91,27 @@@ class ComputeNodeDriver(BaseComputeNode
                      "VolumeSize": volsize,
                      "VolumeType": "gp2"
                  }}]
+         if size.preemptible:
+             # Request a Spot instance for this node
+             kw['ex_spot_market'] = True
          return kw
  
      def sync_node(self, cloud_node, arvados_node):
          self.real.ex_create_tags(cloud_node,
                                   {'Name': arvados_node_fqdn(arvados_node)})
  
+     def create_node(self, size, arvados_node):
+         # Set up tag indicating the Arvados assigned Cloud Size id.
+         self.create_kwargs['ex_metadata'].update({'arvados_node_size': size.id})
+         return super(ComputeNodeDriver, self).create_node(size, arvados_node)
      def list_nodes(self):
          # Need to populate Node.size
          nodes = super(ComputeNodeDriver, self).list_nodes()
          for n in nodes:
              if not n.size:
 -                n.size = self.sizes[n.extra["instance_type"]]
 +                n.size = self.sizes()[n.extra["instance_type"]]
+             n.extra['arvados_node_size'] = n.extra.get('tags', {}).get('arvados_node_size')
          return nodes
  
      @classmethod
index f1238db40321601abe42dee33824983de34d1044,be39ecba6bf4b3cfb4ef6e0e5dd7c1168dc86ddd..11025f7840bc00fe6c188ad6b0f9e9bea1795cba
@@@ -38,6 -38,7 +38,6 @@@ class ComputeNodeDriver(BaseComputeNode
          super(ComputeNodeDriver, self).__init__(
              auth_kwargs, list_kwargs, create_kwargs,
              driver_class)
 -        self._sizes_by_id = {sz.id: sz for sz in self.sizes.itervalues()}
          self._disktype_links = {dt.name: self._object_link(dt)
                                  for dt in self.real.ex_list_disktypes()}
  
                    'ex_disks_gce_struct': disks,
                    }
          result['ex_metadata'].update({
-                 'arv-ping-url': self._make_ping_url(arvados_node),
-                 'booted_at': time.strftime(ARVADOS_TIMEFMT, time.gmtime()),
-                 'hostname': arvados_node_fqdn(arvados_node),
-                 })
+             'arvados_node_size': size.id,
+             'arv-ping-url': self._make_ping_url(arvados_node),
+             'booted_at': time.strftime(ARVADOS_TIMEFMT, time.gmtime()),
+             'hostname': arvados_node_fqdn(arvados_node),
+         })
          return result
  
      def list_nodes(self):
          # The GCE libcloud driver only supports filtering node lists by zone.
          # Do our own filtering based on tag list.
          nodelist = [node for node in
                      super(ComputeNodeDriver, self).list_nodes()
                      if self.node_tags.issubset(node.extra.get('tags', []))]
-         # As of 0.18, the libcloud GCE driver sets node.size to the size's name.
-         # It's supposed to be the actual size object.  Check that it's not,
-         # and monkeypatch the results when that's the case.
-         if nodelist and not hasattr(nodelist[0].size, 'id'):
-             for node in nodelist:
+         for node in nodelist:
+             # As of 0.18, the libcloud GCE driver sets node.size to the size's name.
+             # It's supposed to be the actual size object.  Check that it's not,
+             # and monkeypatch the results when that's the case.
+             if not hasattr(node.size, 'id'):
 -                node.size = self._sizes_by_id[node.size]
 +                node.size = self.sizes()[node.size]
+             # Get arvados-assigned cloud size id
+             node.extra['arvados_node_size'] = node.extra.get('metadata', {}).get('arvados_node_size')
          return nodelist
  
      @classmethod
index 0d6fdfca9a6c057c18ee6d4ea63e4cdade539a9b,911798e08f937ded2d10e30b8b8fe7d64edd8f6b..6e85b85ab2c8277aae88d27ff0ad96917226af98
@@@ -318,7 -318,7 +318,7 @@@ class NodeManagerDaemonActor(actor_clas
          busy_count = counts["busy"]
          wishlist_count = self._size_wishlist(size)
  
-         self._logger.info("%s: wishlist %i, up %i (booting %i, unpaired %i, idle %i, busy %i), down %i, shutdown %i", size.name,
+         self._logger.info("%s: wishlist %i, up %i (booting %i, unpaired %i, idle %i, busy %i), down %i, shutdown %i", size.id,
                            wishlist_count,
                            up_count,
                            counts["booting"],
              can_boot = int((self.max_total_price - total_price) / size.price)
              if can_boot == 0:
                  self._logger.info("Not booting %s (price %s) because with it would exceed max_total_price of %s (current total_price is %s)",
-                                   size.name, size.price, self.max_total_price, total_price)
+                                   size.id, size.price, self.max_total_price, total_price)
              return can_boot
          else:
              return wanted
              return None
          arvados_node = self.arvados_nodes.find_stale_node(self.node_stale_after)
          self._logger.info("Want %i more %s nodes.  Booting a node.",
-                           nodes_wanted, cloud_size.name)
+                           nodes_wanted, cloud_size.id)
          new_setup = self._node_setup.start(
              timer_actor=self._timer,
              arvados_client=self._new_arvados(),
              arvados_node=arvados_node,
              cloud_client=self._new_cloud(),
 -            cloud_size=self.server_calculator.find_size(cloud_size.id)).proxy()
 -        self.booting[new_setup.actor_ref.actor_urn] = new_setup
 -        self.sizes_booting[new_setup.actor_ref.actor_urn] = cloud_size
 +            cloud_size=self.server_calculator.find_size(cloud_size.id))
 +        self.booting[new_setup.actor_urn] = new_setup.proxy()
 +        self.sizes_booting[new_setup.actor_urn] = cloud_size
  
          if arvados_node is not None:
              self.arvados_nodes[arvados_node['uuid']].assignment_time = (
                  time.time())
 -        new_setup.subscribe(self._later.node_setup_finished)
 +        new_setup.tell_proxy().subscribe(self._later.node_setup_finished)
          if nodes_wanted > 1:
              self._later.start_node(cloud_size)