diff --git a/doc/source/admin/configuration/schedulers.rst b/doc/source/admin/configuration/schedulers.rst index 1ac53bfe89..538919d706 100644 --- a/doc/source/admin/configuration/schedulers.rst +++ b/doc/source/admin/configuration/schedulers.rst @@ -1191,6 +1191,14 @@ set`. Some scheduling filter operations can be performed by placement for increased speed and efficiency. +.. note:: + + The nova-api service attempts (as of nova 18.0.0) to automatically mirror + the association of a compute host with an aggregate when an administrator + adds or removes a host to/from a nova host aggregate. This should alleviate + the need to manually create those association records in the placement API + using the ``openstack resource provider aggregate set`` CLI invocation. + Tenant Isolation with Placement ------------------------------- diff --git a/doc/source/user/placement.rst b/doc/source/user/placement.rst index a865d11afa..8d904d771f 100644 --- a/doc/source/user/placement.rst +++ b/doc/source/user/placement.rst @@ -293,10 +293,11 @@ Rocky (18.0.0) that service. This is because the ``nova-api`` service now needs to talk to the placement service in order to (1) delete resource provider allocations when deleting an instance and the ``nova-compute`` service on which that - instance is running is down and (2) delete a ``nova-compute`` service record - via the ``DELETE /os-services/{service_id}`` API. This change is idempotent - if ``[placement]`` is not configured in ``nova-api`` but it will result in - new warnings in the logs until configured. + instance is running is down (2) delete a ``nova-compute`` service record via + the ``DELETE /os-services/{service_id}`` API and (3) mirror aggregate host + associations to the placement service. This change is idempotent if + ``[placement]`` is not configured in ``nova-api`` but it will result in new + warnings in the logs until configured. REST API ======== diff --git a/nova/compute/api.py b/nova/compute/api.py index b71b9bddc9..f6217e4a83 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -4961,6 +4961,7 @@ class AggregateAPI(base.Base): def __init__(self, **kwargs): self.compute_rpcapi = compute_rpcapi.ComputeAPI() self.scheduler_client = scheduler_client.SchedulerClient() + self.placement_client = self.scheduler_client.reportclient super(AggregateAPI, self).__init__(**kwargs) @wrap_exception() @@ -5142,6 +5143,32 @@ class AggregateAPI(base.Base): aggregate.add_host(host_name) self.scheduler_client.update_aggregates(context, [aggregate]) + try: + self.placement_client.aggregate_add_host( + context, aggregate.uuid, host_name) + except exception.PlacementAPIConnectFailure: + # NOTE(jaypipes): Rocky should be able to tolerate the nova-api + # service not communicating with the Placement API, so just log a + # warning here. + # TODO(jaypipes): Remove this in Stein, when placement must be able + # to be contacted from the nova-api service. + LOG.warning("Failed to associate %s with a placement " + "aggregate: %s. There was a failure to communicate " + "with the placement service.", + host_name, aggregate.uuid) + except (exception.ResourceProviderNotFound, + exception.ResourceProviderAggregateRetrievalFailed, + exception.ResourceProviderUpdateFailed) as err: + # NOTE(jaypipes): We don't want a failure perform the mirroring + # action in the placement service to be returned to the user (they + # probably don't know anything about the placement service and + # would just be confused). So, we just log a warning here, noting + # that on the next run of nova-manage placement sync_aggregates + # things will go back to normal + LOG.warning("Failed to associate %s with a placement " + "aggregate: %s. This may be corrected after running " + "nova-manage placement sync_aggregates.", + host_name, err) self._update_az_cache_for_host(context, host_name, aggregate.metadata) # NOTE(jogo): Send message to host to support resource pools self.compute_rpcapi.add_aggregate_host(context, @@ -5181,6 +5208,32 @@ class AggregateAPI(base.Base): aggregate.delete_host(host_name) self.scheduler_client.update_aggregates(context, [aggregate]) + try: + self.placement_client.aggregate_remove_host( + context, aggregate.uuid, host_name) + except exception.PlacementAPIConnectFailure: + # NOTE(jaypipes): Rocky should be able to tolerate the nova-api + # service not communicating with the Placement API, so just log a + # warning here. + # TODO(jaypipes): Remove this in Stein, when placement must be able + # to be contacted from the nova-api service. + LOG.warning("Failed to remove association of %s with a placement " + "aggregate: %s. There was a failure to communicate " + "with the placement service.", + host_name, aggregate.uuid) + except (exception.ResourceProviderNotFound, + exception.ResourceProviderAggregateRetrievalFailed, + exception.ResourceProviderUpdateFailed) as err: + # NOTE(jaypipes): We don't want a failure perform the mirroring + # action in the placement service to be returned to the user (they + # probably don't know anything about the placement service and + # would just be confused). So, we just log a warning here, noting + # that on the next run of nova-manage placement sync_aggregates + # things will go back to normal + LOG.warning("Failed to remove association of %s with a placement " + "aggregate: %s. This may be corrected after running " + "nova-manage placement sync_aggregates.", + host_name, err) self._update_az_cache_for_host(context, host_name, aggregate.metadata) self.compute_rpcapi.remove_aggregate_host(context, aggregate=aggregate, host_param=host_name, host=host_name) diff --git a/nova/exception.py b/nova/exception.py index b6365b36bd..15052fd9b2 100644 --- a/nova/exception.py +++ b/nova/exception.py @@ -2144,11 +2144,19 @@ class ResourceProviderUpdateFailed(NovaException): "%(error)s") +class ResourceProviderNotFound(NotFound): + msg_fmt = _("No such resource provider %(name_or_uuid)s.") + + class ResourceProviderSyncFailed(NovaException): msg_fmt = _("Failed to synchronize the placement service with resource " "provider information supplied by the compute host.") +class PlacementAPIConnectFailure(NovaException): + msg_fmt = _("Unable to communicate with the Placement API.") + + class PlacementAPIConflict(NovaException): """Any 409 error from placement APIs should use (a subclass of) this exception. diff --git a/nova/scheduler/client/report.py b/nova/scheduler/client/report.py index 8d2b6e34a6..e4439bc6be 100644 --- a/nova/scheduler/client/report.py +++ b/nova/scheduler/client/report.py @@ -1243,7 +1243,8 @@ class SchedulerReportClient(object): raise exception.ResourceProviderUpdateFailed(url=url, error=resp.text) @safe_connect - def set_aggregates_for_provider(self, context, rp_uuid, aggregates): + def set_aggregates_for_provider(self, context, rp_uuid, aggregates, + use_cache=True): """Replace a provider's aggregates with those specified. The provider must exist - this method does not attempt to create it. @@ -1252,6 +1253,8 @@ class SchedulerReportClient(object): :param rp_uuid: The UUID of the provider whose aggregates are to be updated. :param aggregates: Iterable of aggregates to set on the provider. + :param use_cache: If False, indicates not to update the cache of + resource providers. :raises: ResourceProviderUpdateFailed on any placement API failure. """ # TODO(efried): Handle generation conflicts when supported by placement @@ -1262,7 +1265,8 @@ class SchedulerReportClient(object): if resp.status_code == 200: placement_aggs = resp.json()['aggregates'] - self._provider_tree.update_aggregates(rp_uuid, placement_aggs) + if use_cache: + self._provider_tree.update_aggregates(rp_uuid, placement_aggs) return # Some error occurred; log it @@ -1868,3 +1872,124 @@ class SchedulerReportClient(object): # TODO(efried): Raise these. Right now this is being left a no-op # for backward compatibility. pass + + @safe_connect + def _get_provider_by_name(self, context, name): + """Queries the placement API for resource provider information matching + a supplied name. + + :param context: The security context + :param name: Name of the resource provider to look up + :return: A dict of resource provider information including the + provider's UUID and generation + :raises: `exception.ResourceProviderNotFound` when no such provider was + found + """ + resp = self.get("/resource_providers?name=%s" % name, + global_request_id=context.global_id) + if resp.status_code == 200: + data = resp.json() + records = data['resource_providers'] + num_recs = len(records) + if num_recs == 1: + return records[0] + elif num_recs > 1: + msg = ("Found multiple resource provider records for resource " + "provider name %(rp_name)s: %(rp_uuids)s. " + "This should not happen.") + LOG.warning(msg, { + 'rp_name': name, + 'rp_uuids': ','.join([r['uuid'] for r in records]) + }) + elif resp.status_code != 404: + msg = ("Failed to retrieve resource provider information by name " + "for resource provider %s. Got %d: %s") + LOG.warning(msg, name, resp.status_code, resp.text) + + raise exception.ResourceProviderNotFound(name_or_uuid=name) + + def aggregate_add_host(self, context, agg_uuid, host_name): + """Looks up a resource provider by the supplied host name, and adds the + aggregate with supplied UUID to that resource provider. + + :note: This method does NOT use the cached provider tree. It is only + called from the Compute API when a nova host aggregate is + modified + + :param context: The security context + :param agg_uuid: UUID of the aggregate being modified + :param host_name: Name of the nova-compute service worker to look up a + resource provider for + :raises: `exceptions.ResourceProviderNotFound` if no resource provider + matching the host name could be found from the placement API + :raises: `exception.ResourceProviderAggregateRetrievalFailed` when + failing to get a provider's existing aggregates + :raises: `exception.ResourceProviderUpdateFailed` if there was a + failure attempting to save the provider aggregates + """ + rp = self._get_provider_by_name(context, host_name) + # NOTE(jaypipes): Unfortunately, due to @safe_connect, + # _get_provider_by_name() can return None. If that happens, raise an + # error so we can trap for it in the Nova API code and ignore in Rocky, + # blow up in Stein. + if rp is None: + raise exception.PlacementAPIConnectFailure() + rp_uuid = rp['uuid'] + + # Now attempt to add the aggregate to the resource provider. We don't + # want to overwrite any other aggregates the provider may be associated + # with, however, so we first grab the list of aggregates for this + # provider and add the aggregate to the list of aggregates it already + # has + existing_aggs = self._get_provider_aggregates(context, rp_uuid) + if agg_uuid in existing_aggs: + return + + new_aggs = existing_aggs | set([agg_uuid]) + # TODO(jaypipes): Send provider generation (which is in the rp dict) + # along to set_aggregates_for_provider() + self.set_aggregates_for_provider( + context, rp_uuid, new_aggs, use_cache=False) + + def aggregate_remove_host(self, context, agg_uuid, host_name): + """Looks up a resource provider by the supplied host name, and removes + the aggregate with supplied UUID from that resource provider. + + :note: This method does NOT use the cached provider tree. It is only + called from the Compute API when a nova host aggregate is + modified + + :param context: The security context + :param agg_uuid: UUID of the aggregate being modified + :param host_name: Name of the nova-compute service worker to look up a + resource provider for + :raises: `exceptions.ResourceProviderNotFound` if no resource provider + matching the host name could be found from the placement API + :raises: `exception.ResourceProviderAggregateRetrievalFailed` when + failing to get a provider's existing aggregates + :raises: `exception.ResourceProviderUpdateFailed` if there was a + failure attempting to save the provider aggregates + """ + rp = self._get_provider_by_name(context, host_name) + # NOTE(jaypipes): Unfortunately, due to @safe_connect, + # _get_provider_by_name() can return None. If that happens, raise an + # error so we can trap for it in the Nova API code and ignore in Rocky, + # blow up in Stein. + if rp is None: + raise exception.PlacementAPIConnectFailure() + rp_uuid = rp['uuid'] + + # Now attempt to remove the aggregate from the resource provider. We + # don't want to overwrite any other aggregates the provider may be + # associated with, however, so we first grab the list of aggregates for + # this provider and remove the aggregate from the list of aggregates it + # already has + existing_aggs = self._get_provider_aggregates(context, rp_uuid) + if agg_uuid not in existing_aggs: + return + + new_aggs = existing_aggs - set([agg_uuid]) + # TODO(jaypipes): Send provider generation (which is in the rp dict) + # along to set_aggregates_for_provider() + self.set_aggregates_for_provider( + context, rp_uuid, new_aggs, use_cache=False) diff --git a/nova/tests/functional/test_aggregates.py b/nova/tests/functional/test_aggregates.py index f3e76cb3cc..9c354ba97f 100644 --- a/nova/tests/functional/test_aggregates.py +++ b/nova/tests/functional/test_aggregates.py @@ -141,20 +141,11 @@ class AggregateRequestFiltersTest(test.TestCase, host_uuid = self._get_provider_uuid_by_host(host) - # Get the existing aggregates for this host in placement and add the - # new one to it - aggs = self.report_client.get( - '/resource_providers/%s/aggregates' % host_uuid, - version='1.1').json() - placement_aggs = aggs['aggregates'] - placement_aggs.append(agg['uuid']) - # Make sure we have a view of the provider we're about to mess with # FIXME(efried): This should be a thing we can do without internals - self.report_client._ensure_resource_provider(self.context, host_uuid) - - self.report_client.set_aggregates_for_provider(self.context, host_uuid, - placement_aggs) + self.report_client._ensure_resource_provider( + self.context, host_uuid, name=host) + self.report_client.aggregate_add_host(self.context, agg['uuid'], host) def _wait_for_state_change(self, server, from_status): for i in range(0, 50): diff --git a/nova/tests/functional/test_report_client.py b/nova/tests/functional/test_report_client.py index 2db4f16cc8..e434c2a6ca 100644 --- a/nova/tests/functional/test_report_client.py +++ b/nova/tests/functional/test_report_client.py @@ -898,3 +898,55 @@ class SchedulerReportClientTests(test.TestCase): uuids.ssp): resp = self.client.get('/resource_providers/%s' % uuid) self.assertEqual(404, resp.status_code) + + @mock.patch('nova.compute.provider_tree.ProviderTree.update_aggregates') + def test_non_tree_aggregate_membership(self, upd_aggs_mock): + """There are some methods of the reportclient that do NOT interact with + the reportclient's provider_tree cache of information. These methods + are called to add and remove members from a nova host aggregate and + ensure that the placement API has a mirrored record of the resource + provider's aggregate associations. + """ + agg_uuid = uuids.agg + with self._interceptor(): + # get_provider_tree_and_ensure_root creates a resource provider + # record for us + ptree = self.client.get_provider_tree_and_ensure_root( + self.context, self.compute_uuid, name=self.compute_name) + self.assertEqual([self.compute_uuid], ptree.get_provider_uuids()) + + # Use the reportclient's _get_provider_aggregates() private method + # to verify no aggregates are yet associated with this provider + aggs = self.client._get_provider_aggregates( + self.context, self.compute_uuid) + self.assertEqual(set(), aggs) + + # Now associate the compute **host name** with an aggregate and + # ensure the aggregate association is saved properly + self.client.aggregate_add_host( + self.context, agg_uuid, self.compute_name) + + # Check that the ProviderTree cache that was populated above during + # get_provider_tree_and_ensure_root() hasn't been modified (since + # the aggregate_add_host() method is only called from nova-api and + # we don't want to have a ProviderTree cache at that layer. + cache_data = self.client._provider_tree.data(self.compute_uuid) + self.assertNotIn(agg_uuid, cache_data.aggregates) + aggs = self.client._get_provider_aggregates( + self.context, self.compute_uuid) + self.assertEqual(set([agg_uuid]), aggs) + + # Finally, remove the association and verify it's removed in + # placement + self.client.aggregate_remove_host( + self.context, agg_uuid, self.compute_name) + cache_data = self.client._provider_tree.data(self.compute_uuid) + self.assertNotIn(agg_uuid, cache_data.aggregates) + aggs = self.client._get_provider_aggregates( + self.context, self.compute_uuid) + self.assertEqual(set(), aggs) + + # Try removing the same host and verify no error + self.client.aggregate_remove_host( + self.context, agg_uuid, self.compute_name) + upd_aggs_mock.assert_not_called() diff --git a/nova/tests/unit/compute/test_compute.py b/nova/tests/unit/compute/test_compute.py index a4a72ee9b2..649dcc2406 100644 --- a/nova/tests/unit/compute/test_compute.py +++ b/nova/tests/unit/compute/test_compute.py @@ -11556,7 +11556,9 @@ class ComputeAPIAggrTestCase(BaseTestCase): self.assertRaises(exception.AggregateNotFound, self.api.delete_aggregate, self.context, aggr.id) - def test_check_az_for_aggregate(self): + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'aggregate_add_host') + def test_check_az_for_aggregate(self, mock_add_host): # Ensure all conflict hosts can be returned values = _create_service_entries(self.context) fake_zone = values[0][0] @@ -11589,7 +11591,9 @@ class ComputeAPIAggrTestCase(BaseTestCase): self.assertEqual(msg.event_type, 'aggregate.updateprop.end') - def test_update_aggregate_no_az(self): + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'aggregate_add_host') + def test_update_aggregate_no_az(self, mock_add_host): # Ensure metadata without availability zone can be # updated,even the aggregate contains hosts belong # to another availability zone @@ -11611,7 +11615,9 @@ class ComputeAPIAggrTestCase(BaseTestCase): self.assertEqual(msg.event_type, 'aggregate.updateprop.end') - def test_update_aggregate_az_change(self): + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'aggregate_add_host') + def test_update_aggregate_az_change(self, mock_add_host): # Ensure availability zone can be updated, # when the aggregate is the only one with # availability zone @@ -11633,7 +11639,9 @@ class ComputeAPIAggrTestCase(BaseTestCase): self.assertEqual(msg.event_type, 'aggregate.updatemetadata.end') - def test_update_aggregate_az_fails(self): + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'aggregate_add_host') + def test_update_aggregate_az_fails(self, mock_add_host): # Ensure aggregate's availability zone can't be updated, # when aggregate has hosts in other availability zone fake_notifier.NOTIFICATIONS = [] @@ -11666,7 +11674,9 @@ class ComputeAPIAggrTestCase(BaseTestCase): self.api.update_aggregate, self.context, aggr4.id, metadata) - def test_update_aggregate_az_fails_with_nova_az(self): + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'aggregate_add_host') + def test_update_aggregate_az_fails_with_nova_az(self, mock_add_host): # Ensure aggregate's availability zone can't be updated, # when aggregate has hosts in other availability zone fake_notifier.NOTIFICATIONS = [] @@ -11725,8 +11735,10 @@ class ComputeAPIAggrTestCase(BaseTestCase): matchers.DictMatches({'availability_zone': 'fake_zone', 'foo_key2': 'foo_value2'})) + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'aggregate_add_host') @mock.patch('nova.compute.utils.notify_about_aggregate_action') - def test_update_aggregate_metadata_no_az(self, mock_notify): + def test_update_aggregate_metadata_no_az(self, mock_notify, mock_add_host): # Ensure metadata without availability zone can be # updated,even the aggregate contains hosts belong # to another availability zone @@ -11756,7 +11768,9 @@ class ComputeAPIAggrTestCase(BaseTestCase): self.assertThat(aggr2.metadata, matchers.DictMatches({'foo_key2': 'foo_value3'})) - def test_update_aggregate_metadata_az_change(self): + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'aggregate_add_host') + def test_update_aggregate_metadata_az_change(self, mock_add_host): # Ensure availability zone can be updated, # when the aggregate is the only one with # availability zone @@ -11794,7 +11808,9 @@ class ComputeAPIAggrTestCase(BaseTestCase): self.assertThat(aggr.metadata, matchers.DictMatches( {'availability_zone': 'new_fake_zone', 'foo_key1': 'foo_value1'})) - def test_update_aggregate_metadata_az_fails(self): + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'aggregate_add_host') + def test_update_aggregate_metadata_az_fails(self, mock_add_host): # Ensure aggregate's availability zone can't be updated, # when aggregate has hosts in other availability zone fake_notifier.NOTIFICATIONS = [] @@ -11873,7 +11889,11 @@ class ComputeAPIAggrTestCase(BaseTestCase): mock.call(context=self.context, aggregate=AggregateIdMatcher(aggr), action='delete', phase='end')]) - def test_delete_non_empty_aggregate(self): + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'aggregate_remove_host') + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'aggregate_add_host') + def test_delete_non_empty_aggregate(self, mock_add_host, mock_remove_host): # Ensure InvalidAggregateAction is raised when non empty aggregate. _create_service_entries(self.context, [['fake_availability_zone', ['fake_host']]]) @@ -11882,11 +11902,14 @@ class ComputeAPIAggrTestCase(BaseTestCase): self.api.add_host_to_aggregate(self.context, aggr.id, 'fake_host') self.assertRaises(exception.InvalidAggregateActionDelete, self.api.delete_aggregate, self.context, aggr.id) + mock_remove_host.assert_not_called() + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'aggregate_add_host') @mock.patch('nova.compute.utils.notify_about_aggregate_action') @mock.patch.object(availability_zones, 'update_host_availability_zone_cache') - def test_add_host_to_aggregate(self, mock_az, mock_notify): + def test_add_host_to_aggregate(self, mock_az, mock_notify, mock_add_host): # Ensure we can add a host to an aggregate. values = _create_service_entries(self.context) fake_zone = values[0][0] @@ -11919,8 +11942,12 @@ class ComputeAPIAggrTestCase(BaseTestCase): action='add_host', phase='start'), mock.call(context=self.context, aggregate=aggr, action='add_host', phase='end')]) + mock_add_host.assert_called_once_with( + self.context, aggr.uuid, fake_host) - def test_add_host_to_aggr_with_no_az(self): + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'aggregate_add_host') + def test_add_host_to_aggr_with_no_az(self, mock_add_host): values = _create_service_entries(self.context) fake_zone = values[0][0] fake_host = values[0][1][0] @@ -11936,7 +11963,9 @@ class ComputeAPIAggrTestCase(BaseTestCase): self.assertIn(fake_host, aggr.hosts) self.assertIn(fake_host, aggr_no_az.hosts) - def test_add_host_to_multi_az(self): + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'aggregate_add_host') + def test_add_host_to_multi_az(self, mock_add_host): # Ensure we can't add a host to different availability zone values = _create_service_entries(self.context) fake_zone = values[0][0] @@ -11952,8 +11981,11 @@ class ComputeAPIAggrTestCase(BaseTestCase): self.assertRaises(exception.InvalidAggregateActionAdd, self.api.add_host_to_aggregate, self.context, aggr2.id, fake_host) + self.assertEqual(1, mock_add_host.call_count) - def test_add_host_to_multi_az_with_nova_agg(self): + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'aggregate_add_host') + def test_add_host_to_multi_az_with_nova_agg(self, mock_add_host): # Ensure we can't add a host if already existing in an agg with AZ set # to default values = _create_service_entries(self.context) @@ -11970,8 +12002,11 @@ class ComputeAPIAggrTestCase(BaseTestCase): self.assertRaises(exception.InvalidAggregateActionAdd, self.api.add_host_to_aggregate, self.context, aggr2.id, fake_host) + self.assertEqual(1, mock_add_host.call_count) - def test_add_host_to_aggregate_multiple(self): + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'aggregate_add_host') + def test_add_host_to_aggregate_multiple(self, mock_add_host): # Ensure we can add multiple hosts to an aggregate. values = _create_service_entries(self.context) fake_zone = values[0][0] @@ -11981,8 +12016,11 @@ class ComputeAPIAggrTestCase(BaseTestCase): aggr = self.api.add_host_to_aggregate(self.context, aggr.id, host) self.assertEqual(len(aggr.hosts), len(values[0][1])) + self.assertEqual(len(aggr.hosts), mock_add_host.call_count) - def test_add_host_to_aggregate_raise_not_found(self): + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'aggregate_add_host') + def test_add_host_to_aggregate_raise_not_found(self, mock_add_host): # Ensure ComputeHostNotFound is raised when adding invalid host. aggr = self.api.create_aggregate(self.context, 'fake_aggregate', 'fake_zone') @@ -11993,11 +12031,14 @@ class ComputeAPIAggrTestCase(BaseTestCase): self.assertEqual(len(fake_notifier.NOTIFICATIONS), 2) self.assertEqual(fake_notifier.NOTIFICATIONS[1].publisher_id, 'compute.fake-mini') + mock_add_host.assert_not_called() + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'aggregate_add_host') @mock.patch('nova.objects.HostMapping.get_by_host') @mock.patch('nova.context.set_target_cell') def test_add_host_to_aggregate_raise_cn_not_found(self, mock_st, - mock_hm): + mock_hm, mock_add_host): # Ensure ComputeHostNotFound is raised when adding invalid host. aggr = self.api.create_aggregate(self.context, 'fake_aggregate', 'fake_zone') @@ -12005,11 +12046,17 @@ class ComputeAPIAggrTestCase(BaseTestCase): self.assertRaises(exception.ComputeHostNotFound, self.api.add_host_to_aggregate, self.context, aggr.id, 'invalid_host') + mock_add_host.assert_not_called() + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'aggregate_remove_host') + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'aggregate_add_host') @mock.patch('nova.compute.utils.notify_about_aggregate_action') @mock.patch.object(availability_zones, 'update_host_availability_zone_cache') - def test_remove_host_from_aggregate_active(self, mock_az, mock_notify): + def test_remove_host_from_aggregate_active( + self, mock_az, mock_notify, mock_add_host, mock_remove_host): # Ensure we can remove a host from an aggregate. values = _create_service_entries(self.context) fake_zone = values[0][0] @@ -12047,8 +12094,13 @@ class ComputeAPIAggrTestCase(BaseTestCase): action='remove_host', phase='start'), mock.call(context=self.context, aggregate=expected, action='remove_host', phase='end')]) + mock_remove_host.assert_called_once_with( + self.context, aggr.uuid, host_to_remove) - def test_remove_host_from_aggregate_raise_not_found(self): + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'aggregate_remove_host') + def test_remove_host_from_aggregate_raise_not_found( + self, mock_remove_host): # Ensure HostMappingNotFound is raised when removing invalid host. _create_service_entries(self.context, [['fake_zone', ['fake_host']]]) aggr = self.api.create_aggregate(self.context, 'fake_aggregate', @@ -12056,12 +12108,16 @@ class ComputeAPIAggrTestCase(BaseTestCase): self.assertRaises(exception.HostMappingNotFound, self.api.remove_host_from_aggregate, self.context, aggr.id, 'invalid_host') + mock_remove_host.assert_not_called() + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'aggregate_remove_host') @mock.patch('nova.objects.HostMapping.get_by_host') @mock.patch('nova.context.set_target_cell') def test_remove_host_from_aggregate_raise_cn_not_found(self, mock_st, - mock_hm): + mock_hm, + mock_remove_host): # Ensure ComputeHostNotFound is raised when removing invalid host. _create_service_entries(self.context, [['fake_zone', ['fake_host']]]) aggr = self.api.create_aggregate(self.context, 'fake_aggregate', @@ -12069,6 +12125,7 @@ class ComputeAPIAggrTestCase(BaseTestCase): self.assertRaises(exception.ComputeHostNotFound, self.api.remove_host_from_aggregate, self.context, aggr.id, 'invalid_host') + mock_remove_host.assert_not_called() def test_aggregate_list(self): aggregate = self.api.create_aggregate(self.context, @@ -12100,7 +12157,9 @@ class ComputeAPIAggrTestCase(BaseTestCase): self.assertEqual('foo_value1', test_agg_meta['foo_key1']) self.assertEqual('foo_value2', test_agg_meta['foo_key2']) - def test_aggregate_list_with_hosts(self): + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'aggregate_add_host') + def test_aggregate_list_with_hosts(self, mock_add_host): values = _create_service_entries(self.context) fake_zone = values[0][0] host_aggregate = self.api.create_aggregate(self.context, @@ -12161,14 +12220,16 @@ class ComputeAPIAggrCallsSchedulerTestCase(test.NoDBTestCase): self.api.delete_aggregate(self.context, 1) delete_aggregate.assert_called_once_with(self.context, agg) + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'aggregate_add_host') @mock.patch('nova.compute.utils.notify_about_aggregate_action') @mock.patch('nova.compute.rpcapi.ComputeAPI.add_aggregate_host') @mock.patch.object(scheduler_client.SchedulerClient, 'update_aggregates') def test_add_host_to_aggregate(self, update_aggregates, mock_add_agg, - mock_notify): + mock_notify, mock_add_host): self.api.is_safe_to_update_az = mock.Mock() self.api._update_az_cache_for_host = mock.Mock() - agg = objects.Aggregate(name='fake', metadata={}) + agg = objects.Aggregate(name='fake', metadata={}, uuid=uuids.agg) agg.add_host = mock.Mock() with test.nested( mock.patch.object(objects.Service, 'get_by_compute_host'), @@ -12179,14 +12240,19 @@ class ComputeAPIAggrCallsSchedulerTestCase(test.NoDBTestCase): mock_add_agg.assert_called_once_with(self.context, aggregate=agg, host_param='fakehost', host='fakehost') + mock_add_host.assert_called_once_with( + self.context, agg.uuid, 'fakehost') + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'aggregate_remove_host') @mock.patch('nova.compute.utils.notify_about_aggregate_action') @mock.patch('nova.compute.rpcapi.ComputeAPI.remove_aggregate_host') @mock.patch.object(scheduler_client.SchedulerClient, 'update_aggregates') def test_remove_host_from_aggregate(self, update_aggregates, - mock_remove_agg, mock_notify): + mock_remove_agg, mock_notify, + mock_remove_host): self.api._update_az_cache_for_host = mock.Mock() - agg = objects.Aggregate(name='fake', metadata={}) + agg = objects.Aggregate(name='fake', metadata={}, uuid=uuids.agg) agg.delete_host = mock.Mock() with test.nested( mock.patch.object(objects.Service, 'get_by_compute_host'), @@ -12202,6 +12268,8 @@ class ComputeAPIAggrCallsSchedulerTestCase(test.NoDBTestCase): action='remove_host', phase='start'), mock.call(context=self.context, aggregate=agg, action='remove_host', phase='end')]) + mock_remove_host.assert_called_once_with( + self.context, agg.uuid, 'fakehost') class ComputeAggrTestCase(BaseTestCase): diff --git a/nova/tests/unit/compute/test_host_api.py b/nova/tests/unit/compute/test_host_api.py index fbc944d959..77bf431ec2 100644 --- a/nova/tests/unit/compute/test_host_api.py +++ b/nova/tests/unit/compute/test_host_api.py @@ -405,9 +405,14 @@ class ComputeHostAPITestCase(test.TestCase): self.assertFalse(service2.destroy.called) self.assertFalse(set_target.called) + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'aggregate_remove_host') + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'aggregate_add_host') @mock.patch.object(objects.ComputeNodeList, 'get_all_by_host') @mock.patch.object(objects.HostMapping, 'get_by_host') - def test_service_delete_compute_in_aggregate(self, mock_hm, mock_get_cn): + def test_service_delete_compute_in_aggregate( + self, mock_hm, mock_get_cn, mock_add_host, mock_remove_host): compute = self.host_api.db.service_create(self.ctxt, {'host': 'fake-compute-host', 'binary': 'nova-compute', @@ -423,11 +428,15 @@ class ComputeHostAPITestCase(test.TestCase): self.aggregate_api.add_host_to_aggregate(self.ctxt, aggregate.id, 'fake-compute-host') + mock_add_host.assert_called_once_with( + mock.ANY, aggregate.uuid, 'fake-compute-host') self.controller.delete(self.req, compute.id) result = self.aggregate_api.get_aggregate(self.ctxt, aggregate.id).hosts self.assertEqual([], result) mock_hm.return_value.destroy.assert_called_once_with() + mock_remove_host.assert_called_once_with( + mock.ANY, aggregate.uuid, 'fake-compute-host') @mock.patch('nova.db.compute_node_statistics') def test_compute_node_statistics(self, mock_cns): @@ -681,3 +690,95 @@ class ComputeHostAPICellsTestCase(ComputeHostAPITestCase): self.assertRaises(exception.ComputeHostNotFound, self.host_api.compute_node_get, self.ctxt, cell_compute_uuid) + + +class ComputeAggregateAPITestCase(test.TestCase): + def setUp(self): + super(ComputeAggregateAPITestCase, self).setUp() + self.aggregate_api = compute_api.AggregateAPI() + self.ctxt = context.get_admin_context() + # NOTE(jaypipes): We just mock out the HostNapping and Service object + # lookups in order to bypass the code that does cell lookup stuff, + # which isn't germane to these tests + self.useFixture( + fixtures.MockPatch('nova.objects.HostMapping.get_by_host')) + self.useFixture( + fixtures.MockPatch('nova.context.set_target_cell')) + self.useFixture( + fixtures.MockPatch('nova.objects.Service.get_by_compute_host')) + + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'aggregate_add_host') + @mock.patch.object(compute_api.LOG, 'warning') + def test_aggregate_add_host_placement_missing_provider( + self, mock_log, mock_pc_add_host): + hostname = 'fake-host' + err = exception.ResourceProviderNotFound(name_or_uuid=hostname) + mock_pc_add_host.side_effect = err + aggregate = self.aggregate_api.create_aggregate( + self.ctxt, 'aggregate', None) + self.aggregate_api.add_host_to_aggregate( + self.ctxt, aggregate.id, hostname) + # Nothing should blow up in Rocky, but we should get a warning + msg = ("Failed to associate %s with a placement " + "aggregate: %s. This may be corrected after running " + "nova-manage placement sync_aggregates.") + mock_log.assert_called_with(msg, hostname, err) + + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'aggregate_add_host') + @mock.patch.object(compute_api.LOG, 'warning') + def test_aggregate_add_host_bad_placement( + self, mock_log, mock_pc_add_host): + hostname = 'fake-host' + mock_pc_add_host.side_effect = exception.PlacementAPIConnectFailure + aggregate = self.aggregate_api.create_aggregate( + self.ctxt, 'aggregate', None) + agg_uuid = aggregate.uuid + self.aggregate_api.add_host_to_aggregate( + self.ctxt, aggregate.id, hostname) + # Nothing should blow up in Rocky, but we should get a warning about + # placement connectivity failure + msg = ("Failed to associate %s with a placement " + "aggregate: %s. There was a failure to communicate " + "with the placement service.") + mock_log.assert_called_with(msg, hostname, agg_uuid) + + @mock.patch('nova.objects.Aggregate.delete_host') + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'aggregate_remove_host') + @mock.patch.object(compute_api.LOG, 'warning') + def test_aggregate_remove_host_bad_placement( + self, mock_log, mock_pc_remove_host, mock_agg_obj_delete_host): + hostname = 'fake-host' + mock_pc_remove_host.side_effect = exception.PlacementAPIConnectFailure + aggregate = self.aggregate_api.create_aggregate( + self.ctxt, 'aggregate', None) + agg_uuid = aggregate.uuid + self.aggregate_api.remove_host_from_aggregate( + self.ctxt, aggregate.id, hostname) + # Nothing should blow up in Rocky, but we should get a warning about + # placement connectivity failure + msg = ("Failed to remove association of %s with a placement " + "aggregate: %s. There was a failure to communicate " + "with the placement service.") + mock_log.assert_called_with(msg, hostname, agg_uuid) + + @mock.patch('nova.objects.Aggregate.delete_host') + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'aggregate_remove_host') + @mock.patch.object(compute_api.LOG, 'warning') + def test_aggregate_remove_host_placement_missing_provider( + self, mock_log, mock_pc_remove_host, mock_agg_obj_delete_host): + hostname = 'fake-host' + err = exception.ResourceProviderNotFound(name_or_uuid=hostname) + mock_pc_remove_host.side_effect = err + aggregate = self.aggregate_api.create_aggregate( + self.ctxt, 'aggregate', None) + self.aggregate_api.remove_host_from_aggregate( + self.ctxt, aggregate.id, hostname) + # Nothing should blow up in Rocky, but we should get a warning + msg = ("Failed to remove association of %s with a placement " + "aggregate: %s. This may be corrected after running " + "nova-manage placement sync_aggregates.") + mock_log.assert_called_with(msg, hostname, err) diff --git a/nova/tests/unit/scheduler/client/test_report.py b/nova/tests/unit/scheduler/client/test_report.py index 688b315e65..f61a0b4067 100644 --- a/nova/tests/unit/scheduler/client/test_report.py +++ b/nova/tests/unit/scheduler/client/test_report.py @@ -12,6 +12,7 @@ import time +import fixtures from keystoneauth1 import exceptions as ks_exc import mock from six.moves.urllib import parse @@ -3378,3 +3379,199 @@ class TestResourceClass(SchedulerReportClientTestCase): self.mock_put.assert_called_once_with( '/resource_classes/CUSTOM_BAD', None, version='1.7', global_request_id=self.context.global_id) + + +class TestAggregateAddRemoveHost(SchedulerReportClientTestCase): + """Unit tests for the methods of the report client which look up providers + by name and add/remove host aggregates to providers. These methods do not + access the SchedulerReportClient provider_tree attribute and are called + from the nova API, not the nova compute manager/resource tracker. + """ + def setUp(self): + super(TestAggregateAddRemoveHost, self).setUp() + self.mock_get = self.useFixture( + fixtures.MockPatch('nova.scheduler.client.report.' + 'SchedulerReportClient.get')).mock + self.mock_put = self.useFixture( + fixtures.MockPatch('nova.scheduler.client.report.' + 'SchedulerReportClient.put')).mock + + def test_get_provider_by_name_success(self): + get_resp = mock.Mock() + get_resp.status_code = 200 + get_resp.json.return_value = { + "resource_providers": [ + mock.sentinel.expected, + ] + } + self.mock_get.return_value = get_resp + name = 'cn1' + res = self.client._get_provider_by_name(self.context, name) + + exp_url = "/resource_providers?name=%s" % name + self.mock_get.assert_called_once_with( + exp_url, global_request_id=self.context.global_id) + self.assertEqual(mock.sentinel.expected, res) + + @mock.patch.object(report.LOG, 'warning') + def test_get_provider_by_name_multiple_results(self, mock_log): + """Test that if we find multiple resource providers with the same name, + that a ResourceProviderNotFound is raised (the reason being that >1 + resource provider with a name should never happen...) + """ + get_resp = mock.Mock() + get_resp.status_code = 200 + get_resp.json.return_value = { + "resource_providers": [ + {'uuid': uuids.cn1a}, + {'uuid': uuids.cn1b}, + ] + } + self.mock_get.return_value = get_resp + name = 'cn1' + self.assertRaises( + exception.ResourceProviderNotFound, + self.client._get_provider_by_name, self.context, name) + mock_log.assert_called_once() + + @mock.patch.object(report.LOG, 'warning') + def test_get_provider_by_name_500(self, mock_log): + get_resp = mock.Mock() + get_resp.status_code = 500 + self.mock_get.return_value = get_resp + name = 'cn1' + self.assertRaises( + exception.ResourceProviderNotFound, + self.client._get_provider_by_name, self.context, name) + mock_log.assert_called_once() + + @mock.patch.object(report.LOG, 'warning') + def test_get_provider_by_name_404(self, mock_log): + get_resp = mock.Mock() + get_resp.status_code = 404 + self.mock_get.return_value = get_resp + name = 'cn1' + self.assertRaises( + exception.ResourceProviderNotFound, + self.client._get_provider_by_name, self.context, name) + mock_log.assert_not_called() + + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'set_aggregates_for_provider') + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + '_get_provider_aggregates') + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + '_get_provider_by_name') + def test_aggregate_add_host_success_no_existing( + self, mock_get_by_name, mock_get_aggs, mock_set_aggs): + mock_get_by_name.return_value = { + 'uuid': uuids.cn1, + } + agg_uuid = uuids.agg1 + mock_get_aggs.return_value = set([]) + name = 'cn1' + self.client.aggregate_add_host(self.context, agg_uuid, name) + mock_set_aggs.assert_called_once_with( + self.context, uuids.cn1, set([agg_uuid]), use_cache=False) + + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'set_aggregates_for_provider') + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + '_get_provider_aggregates') + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + '_get_provider_by_name') + def test_aggregate_add_host_success_already_existing( + self, mock_get_by_name, mock_get_aggs, mock_set_aggs): + mock_get_by_name.return_value = { + 'uuid': uuids.cn1, + } + agg1_uuid = uuids.agg1 + agg2_uuid = uuids.agg2 + agg3_uuid = uuids.agg3 + mock_get_aggs.return_value = set([agg1_uuid]) + name = 'cn1' + self.client.aggregate_add_host(self.context, agg1_uuid, name) + mock_set_aggs.assert_not_called() + mock_get_aggs.reset_mock() + mock_set_aggs.reset_mock() + mock_get_aggs.return_value = set([agg1_uuid, agg3_uuid]) + self.client.aggregate_add_host(self.context, agg2_uuid, name) + mock_set_aggs.assert_called_once_with( + self.context, uuids.cn1, set([agg1_uuid, agg2_uuid, agg3_uuid]), + use_cache=False) + + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + '_get_provider_by_name') + def test_aggregate_add_host_no_placement(self, mock_get_by_name): + """In Rocky, we allow nova-api to not be able to communicate with + placement, so the @safe_connect decorator will return None. Check that + an appropriate exception is raised back to the nova-api code in this + case. + """ + mock_get_by_name.return_value = None # emulate @safe_connect... + name = 'cn1' + agg_uuid = uuids.agg1 + self.assertRaises( + exception.PlacementAPIConnectFailure, + self.client.aggregate_add_host, self.context, agg_uuid, name) + self.mock_get.assert_not_called() + + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + '_get_provider_by_name') + def test_aggregate_remove_host_no_placement(self, mock_get_by_name): + """In Rocky, we allow nova-api to not be able to communicate with + placement, so the @safe_connect decorator will return None. Check that + an appropriate exception is raised back to the nova-api code in this + case. + """ + mock_get_by_name.return_value = None # emulate @safe_connect... + name = 'cn1' + agg_uuid = uuids.agg1 + self.assertRaises( + exception.PlacementAPIConnectFailure, + self.client.aggregate_remove_host, self.context, agg_uuid, name) + self.mock_get.assert_not_called() + + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'set_aggregates_for_provider') + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + '_get_provider_aggregates') + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + '_get_provider_by_name') + def test_aggregate_remove_host_success_already_existing( + self, mock_get_by_name, mock_get_aggs, mock_set_aggs): + mock_get_by_name.return_value = { + 'uuid': uuids.cn1, + } + agg_uuid = uuids.agg1 + mock_get_aggs.return_value = set([agg_uuid]) + name = 'cn1' + self.client.aggregate_remove_host(self.context, agg_uuid, name) + mock_set_aggs.assert_called_once_with( + self.context, uuids.cn1, set([]), use_cache=False) + + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'set_aggregates_for_provider') + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + '_get_provider_aggregates') + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + '_get_provider_by_name') + def test_aggregate_remove_host_success_no_existing( + self, mock_get_by_name, mock_get_aggs, mock_set_aggs): + mock_get_by_name.return_value = { + 'uuid': uuids.cn1, + } + agg1_uuid = uuids.agg1 + agg2_uuid = uuids.agg2 + agg3_uuid = uuids.agg3 + mock_get_aggs.return_value = set([]) + name = 'cn1' + self.client.aggregate_remove_host(self.context, agg2_uuid, name) + mock_set_aggs.assert_not_called() + mock_get_aggs.reset_mock() + mock_set_aggs.reset_mock() + mock_get_aggs.return_value = set([agg1_uuid, agg2_uuid, agg3_uuid]) + self.client.aggregate_remove_host(self.context, agg2_uuid, name) + mock_set_aggs.assert_called_once_with( + self.context, uuids.cn1, set([agg1_uuid, agg3_uuid]), + use_cache=False) diff --git a/nova/tests/unit/virt/xenapi/test_xenapi.py b/nova/tests/unit/virt/xenapi/test_xenapi.py index 73f81fcc85..18a51bea91 100644 --- a/nova/tests/unit/virt/xenapi/test_xenapi.py +++ b/nova/tests/unit/virt/xenapi/test_xenapi.py @@ -3179,7 +3179,12 @@ class XenAPIAggregateTestCase(stubs.XenAPITestBase): aggregate, 'fake_host') self.assertIn('aggregate in error', str(ex)) - def test_remove_host_from_aggregate_error(self): + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'aggregate_remove_host') + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'aggregate_add_host') + def test_remove_host_from_aggregate_error( + self, mock_add_host, mock_remove_host): # Ensure we can remove a host from an aggregate even if in error. values = _create_service_entries(self.context) fake_zone = list(values.keys())[0] diff --git a/releasenotes/notes/mirror-host-aggregates-to-placement-597473efa94ee558.yaml b/releasenotes/notes/mirror-host-aggregates-to-placement-597473efa94ee558.yaml new file mode 100644 index 0000000000..4b31d6dbfb --- /dev/null +++ b/releasenotes/notes/mirror-host-aggregates-to-placement-597473efa94ee558.yaml @@ -0,0 +1,21 @@ +--- +features: + - | + We now attempt to mirror the association of compute host to host aggregate + into the placement API. When administrators use the ``POST + /os-aggregates/{aggregate_id}/action`` Compute API call to add or remove a + host from an aggregate, the nova-api service will attempt to ensure that a + corresponding record is created in the placement API for the resource + provider (compute host) and host aggregate UUID. + + The nova-api service needs to understand how to connect to the placement + service in order for this mirroring process to work. Administrators should + ensure that there is a ``[placement]`` section in the nova.conf file which + is used by the nova-api service, and that credentials for interacting with + placement are contained in that section. + + If the ``[placement]`` section is missing from the nova-api's nova.conf + file, nothing will break however there will be some warnings generated in + the nova-api's log file when administrators associate a compute host with a + host aggregate. However, this will become a failure starting in the 19.0.0 + Stein release.