diff --git a/lower-constraints.txt b/lower-constraints.txt index 4b24158ffe..873f8c1cea 100644 --- a/lower-constraints.txt +++ b/lower-constraints.txt @@ -68,7 +68,7 @@ os-brick==2.6.1 os-client-config==1.29.0 os-resource-classes==0.1.0 os-service-types==1.2.0 -os-traits==0.12.0 +os-traits==0.15.0 os-vif==1.14.0 os-win==3.0.0 os-xenapi==0.3.3 diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py index 5f38d72acd..dee2baf571 100644 --- a/nova/compute/resource_tracker.py +++ b/nova/compute/resource_tracker.py @@ -23,6 +23,7 @@ import copy from keystoneauth1 import exceptions as ks_exc import os_resource_classes as orc +import os_traits from oslo_log import log as logging from oslo_serialization import jsonutils import retrying @@ -907,7 +908,46 @@ class ResourceTracker(object): return True return False - def _get_traits(self, nodename, provider_tree): + def _sync_compute_service_disabled_trait(self, context, traits): + """Synchronize the COMPUTE_STATUS_DISABLED trait on the node provider. + + Determines if the COMPUTE_STATUS_DISABLED trait should be added to + or removed from the provider's set of traits based on the related + nova-compute service disabled status. + + :param context: RequestContext for cell database access + :param traits: set of traits for the compute node resource provider; + this is modified by reference + """ + trait = os_traits.COMPUTE_STATUS_DISABLED + try: + service = objects.Service.get_by_compute_host(context, self.host) + if service.disabled: + # The service is disabled so make sure the trait is reported. + traits.add(trait) + else: + # The service is not disabled so do not report the trait. + traits.discard(trait) + except exception.NotFound: + # This should not happen but handle it gracefully. The scheduler + # should ignore this node if the compute service record is gone. + LOG.error('Unable to find services table record for nova-compute ' + 'host %s', self.host) + + def _get_traits(self, context, nodename, provider_tree): + """Synchronizes internal and external traits for the node provider. + + This works in conjunction with the ComptueDriver.update_provider_tree + flow and is used to synchronize traits reported by the compute driver, + traits based on information in the ComputeNode record, and traits set + externally using the placement REST API. + + :param context: RequestContext for cell database access + :param nodename: ComputeNode.hypervisor_hostname for the compute node + resource provider whose traits are being synchronized; the node + must be in the ProviderTree. + :param provider_tree: ProviderTree being updated + """ # Get the traits from the ProviderTree which will be the set # of virt-owned traits plus any externally defined traits set # on the provider that aren't owned by the virt driver. @@ -922,6 +962,8 @@ class ResourceTracker(object): elif trait in traits: traits.remove(trait) + self._sync_compute_service_disabled_trait(context, traits) + return list(traits) @retrying.retry(stop_max_attempt_number=4, @@ -977,7 +1019,10 @@ class ResourceTracker(object): # remove it. But at both t1 and t2 there is a # CUSTOM_VENDOR_TRAIT_X which we can't touch because it # was set externally on the provider. - traits = self._get_traits(nodename, provider_tree=prov_tree) + # We also want to sync the COMPUTE_STATUS_DISABLED trait based + # on the related nova-compute service's disabled status. + traits = self._get_traits( + context, nodename, provider_tree=prov_tree) prov_tree.update_traits(nodename, traits) except NotImplementedError: # update_provider_tree isn't implemented yet - try get_inventory diff --git a/nova/tests/unit/compute/test_resource_tracker.py b/nova/tests/unit/compute/test_resource_tracker.py index 17943e0484..1852759c14 100644 --- a/nova/tests/unit/compute/test_resource_tracker.py +++ b/nova/tests/unit/compute/test_resource_tracker.py @@ -16,6 +16,7 @@ import datetime from keystoneauth1 import exceptions as ks_exc import mock import os_resource_classes as orc +import os_traits from oslo_config import cfg from oslo_utils.fixture import uuidsentinel as uuids from oslo_utils import timeutils @@ -1414,7 +1415,9 @@ class TestUpdateComputeNode(BaseTestCase): save_mock.assert_called_once_with() norm_mock.assert_called_once_with(mock.sentinel.inv_data, new_compute) - def test_existing_node_capabilities_as_traits(self): + @mock.patch('nova.compute.resource_tracker.ResourceTracker.' + '_sync_compute_service_disabled_trait') + def test_existing_node_capabilities_as_traits(self, mock_sync_disabled): """The capabilities_as_traits() driver method returns traits information for a node/provider. """ @@ -1441,9 +1444,14 @@ class TestUpdateComputeNode(BaseTestCase): new_compute.hypervisor_hostname, [mock.sentinel.trait] ) + mock_sync_disabled.assert_called_once_with( + mock.sentinel.ctx, {mock.sentinel.trait}) + @mock.patch('nova.compute.resource_tracker.ResourceTracker.' + '_sync_compute_service_disabled_trait') @mock.patch('nova.objects.ComputeNode.save') - def test_existing_node_update_provider_tree_implemented(self, save_mock): + def test_existing_node_update_provider_tree_implemented(self, save_mock, + mock_sync_disable): """The update_provider_tree() virt driver method is only implemented for some virt drivers. This method returns inventory, trait, and aggregate information for resource providers in a tree associated with @@ -1526,10 +1534,14 @@ class TestUpdateComputeNode(BaseTestCase): # 1024MB in GB exp_inv[orc.DISK_GB]['reserved'] = 1 self.assertEqual(exp_inv, ptree.data(new_compute.uuid).inventory) + mock_sync_disable.assert_called_once() + @mock.patch('nova.compute.resource_tracker.ResourceTracker.' + '_sync_compute_service_disabled_trait') @mock.patch('nova.compute.resource_tracker.ResourceTracker.' '_resource_change', return_value=False) - def test_update_retry_success(self, mock_resource_change): + def test_update_retry_success(self, mock_resource_change, + mock_sync_disabled): self._setup_rt() orig_compute = _COMPUTE_NODE_FIXTURES[0].obj_clone() self.rt.compute_nodes[_NODENAME] = orig_compute @@ -1550,12 +1562,16 @@ class TestUpdateComputeNode(BaseTestCase): self.rt._update(mock.sentinel.ctx, new_compute) self.assertEqual(2, ufpt_mock.call_count) + self.assertEqual(2, mock_sync_disabled.call_count) # The retry is restricted to _update_to_placement self.assertEqual(1, mock_resource_change.call_count) + @mock.patch('nova.compute.resource_tracker.ResourceTracker.' + '_sync_compute_service_disabled_trait') @mock.patch('nova.compute.resource_tracker.ResourceTracker.' '_resource_change', return_value=False) - def test_update_retry_raises(self, mock_resource_change): + def test_update_retry_raises(self, mock_resource_change, + mock_sync_disabled): self._setup_rt() orig_compute = _COMPUTE_NODE_FIXTURES[0].obj_clone() self.rt.compute_nodes[_NODENAME] = orig_compute @@ -1577,9 +1593,62 @@ class TestUpdateComputeNode(BaseTestCase): self.rt._update, mock.sentinel.ctx, new_compute) self.assertEqual(4, ufpt_mock.call_count) + self.assertEqual(4, mock_sync_disabled.call_count) # The retry is restricted to _update_to_placement self.assertEqual(1, mock_resource_change.call_count) + @mock.patch('nova.objects.Service.get_by_compute_host', + return_value=objects.Service(disabled=True)) + def test_sync_compute_service_disabled_trait_add(self, mock_get_by_host): + """Tests the scenario that the compute service is disabled so the + COMPUTE_STATUS_DISABLED trait is added to the traits set. + """ + self._setup_rt() + ctxt = context.get_admin_context() + traits = set() + self.rt._sync_compute_service_disabled_trait(ctxt, traits) + self.assertEqual({os_traits.COMPUTE_STATUS_DISABLED}, traits) + mock_get_by_host.assert_called_once_with(ctxt, self.rt.host) + + @mock.patch('nova.objects.Service.get_by_compute_host', + return_value=objects.Service(disabled=False)) + def test_sync_compute_service_disabled_trait_remove( + self, mock_get_by_host): + """Tests the scenario that the compute service is enabled so the + COMPUTE_STATUS_DISABLED trait is removed from the traits set. + """ + self._setup_rt() + ctxt = context.get_admin_context() + # First test with the trait actually in the set. + traits = {os_traits.COMPUTE_STATUS_DISABLED} + self.rt._sync_compute_service_disabled_trait(ctxt, traits) + self.assertEqual(0, len(traits)) + mock_get_by_host.assert_called_once_with(ctxt, self.rt.host) + # Now run it again with the empty set to make sure the method handles + # the trait not already being in the set (idempotency). + self.rt._sync_compute_service_disabled_trait(ctxt, traits) + self.assertEqual(0, len(traits)) + + @mock.patch('nova.objects.Service.get_by_compute_host', + # One might think Service.get_by_compute_host would raise + # ServiceNotFound but the DB API raises ComputeHostNotFound. + side_effect=exc.ComputeHostNotFound(host=_HOSTNAME)) + @mock.patch('nova.compute.resource_tracker.LOG.error') + def test_sync_compute_service_disabled_trait_service_not_found( + self, mock_log_error, mock_get_by_host): + """Tests the scenario that the compute service is not found so the + traits set is unmodified and an error is logged. + """ + self._setup_rt() + ctxt = context.get_admin_context() + traits = set() + self.rt._sync_compute_service_disabled_trait(ctxt, traits) + self.assertEqual(0, len(traits)) + mock_get_by_host.assert_called_once_with(ctxt, self.rt.host) + mock_log_error.assert_called_once() + self.assertIn('Unable to find services table record for nova-compute', + mock_log_error.call_args[0][0]) + def test_copy_resources_no_update_allocation_ratios(self): """Tests that a ComputeNode object's allocation ratio fields are not set if the configured allocation ratio values are default None. diff --git a/requirements.txt b/requirements.txt index d516768218..f5db1e343c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -56,7 +56,7 @@ psutil>=3.2.2 # BSD oslo.versionedobjects>=1.35.0 # Apache-2.0 os-brick>=2.6.1 # Apache-2.0 os-resource-classes>=0.1.0 # Apache-2.0 -os-traits>=0.12.0 # Apache-2.0 +os-traits>=0.15.0 # Apache-2.0 os-vif>=1.14.0 # Apache-2.0 os-win>=3.0.0 # Apache-2.0 castellan>=0.16.0 # Apache-2.0