diff --git a/nova/compute/manager.py b/nova/compute/manager.py index e24ceaae54..9ab103f935 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -521,7 +521,7 @@ class ComputeVirtAPI(virtapi.VirtAPI): class ComputeManager(manager.Manager): """Manages the running instances from creation to destruction.""" - target = messaging.Target(version='5.4') + target = messaging.Target(version='5.5') def __init__(self, compute_driver=None, *args, **kwargs): """Load configuration options and connect to the hypervisor.""" @@ -4803,6 +4803,79 @@ class ComputeManager(manager.Manager): # not re-scheduling six.reraise(*exc_info) + @messaging.expected_exceptions(exception.MigrationPreCheckError) + @wrap_exception() + @wrap_instance_event(prefix='compute') + @wrap_instance_fault + def prep_snapshot_based_resize_at_dest( + self, ctxt, instance, flavor, nodename, migration, limits, + request_spec): + """Performs pre-cross-cell resize resource claim on the dest host. + + This runs on the destination host in a cross-cell resize operation + before the resize is actually started. + + Performs a resize_claim for resources that are not claimed in placement + like PCI devices and NUMA topology. + + Note that this is different from same-cell prep_resize in that this: + + * Does not RPC cast to the source compute, that is orchestrated from + conductor. + * This does not reschedule on failure, conductor handles that since + conductor is synchronously RPC calling this method. As such, the + reverts_task_state decorator is not used on this method. + + :param ctxt: user auth request context + :param instance: the instance being resized + :param flavor: the flavor being resized to (unchanged for cold migrate) + :param nodename: Name of the target compute node + :param migration: nova.objects.Migration object for the operation + :param limits: nova.objects.SchedulerLimits object of resource limits + :param request_spec: nova.objects.RequestSpec object for the operation + :returns: nova.objects.MigrationContext; the migration context created + on the destination host during the resize_claim. + :raises: nova.exception.MigrationPreCheckError if the pre-check + validation fails for the given host selection + """ + LOG.debug('Checking if we can cross-cell migrate instance to this ' + 'host (%s).', self.host, instance=instance) + self._send_prep_resize_notifications( + ctxt, instance, fields.NotificationPhase.START, flavor) + # TODO(mriedem): _update_pci_request_spec_with_allocated_interface_name + # should be called here if the request spec has request group mappings, + # e.g. for things like QoS ports with resource requests. Do it outside + # the try/except so if it raises BuildAbortException we do not attempt + # to reschedule. + try: + # Get the allocations within the try/except block in case we get + # an error so MigrationPreCheckError is raised up. + allocations = self.reportclient.get_allocs_for_consumer( + ctxt, instance.uuid)['allocations'] + # Claim resources on this target host using the new flavor which + # will create the MigrationContext object. Note that in the future + # if we want to do other validation here we should do it within + # the MoveClaim context so we can drop the claim if anything fails. + self.rt.resize_claim( + ctxt, instance, flavor, nodename, migration, allocations, + image_meta=instance.image_meta, limits=limits) + except Exception as ex: + err = six.text_type(ex) + LOG.warning( + 'Cross-cell resize pre-checks failed for this host (%s). ' + 'Cleaning up. Failure: %s', self.host, err, + instance=instance, exc_info=True) + raise exception.MigrationPreCheckError( + reason=(_("Pre-checks failed on host '%(host)s'. " + "Error: %(error)s") % + {'host': self.host, 'error': err})) + finally: + self._send_prep_resize_notifications( + ctxt, instance, fields.NotificationPhase.END, flavor) + + # ResourceTracker.resize_claim() sets instance.migration_context. + return instance.migration_context + @wrap_exception() @reverts_task_state @wrap_instance_event(prefix='compute') diff --git a/nova/compute/rpcapi.py b/nova/compute/rpcapi.py index e599a0919c..ff7ecbca58 100644 --- a/nova/compute/rpcapi.py +++ b/nova/compute/rpcapi.py @@ -371,6 +371,7 @@ class ComputeAPI(object): check_can_live_migrate_destination(), and a new drop_move_claim_at_destination() method * 5.4 - Add cache_images() support + * 5.5 - Add prep_snapshot_based_resize_at_dest() ''' VERSION_ALIASES = { @@ -845,6 +846,52 @@ class ComputeAPI(object): cctxt = client.prepare(server=host, version=version) cctxt.cast(ctxt, 'prep_resize', **msg_args) + def prep_snapshot_based_resize_at_dest( + self, ctxt, instance, flavor, nodename, migration, limits, + request_spec, destination): + """Performs pre-cross-cell resize resource claim on the dest host. + + This runs on the destination host in a cross-cell resize operation + before the resize is actually started. + + Performs a resize_claim for resources that are not claimed in placement + like PCI devices and NUMA topology. + + Note that this is different from same-cell prep_resize in that this: + + * Does not RPC cast to the source compute, that is orchestrated from + conductor. + * This does not reschedule on failure, conductor handles that since + conductor is synchronously RPC calling this method. + + :param ctxt: user auth request context + :param instance: the instance being resized + :param flavor: the flavor being resized to (unchanged for cold migrate) + :param nodename: Name of the target compute node + :param migration: nova.objects.Migration object for the operation + :param limits: nova.objects.SchedulerLimits object of resource limits + :param request_spec: nova.objects.RequestSpec object for the operation + :param destination: possible target host for the cross-cell resize + :returns: nova.objects.MigrationContext; the migration context created + on the destination host during the resize_claim. + :raises: nova.exception.MigrationPreCheckError if the pre-check + validation fails for the given host selection or the destination + compute service is too old for this method + :raises: oslo_messaging.exceptions.MessagingTimeout if the pre-check + RPC call times out + """ + version = '5.5' + client = self.router.client(ctxt) + if not client.can_send_version(version): + raise exception.MigrationPreCheckError(reason=_('Compute too old')) + cctxt = client.prepare(server=destination, version=version, + call_monitor_timeout=CONF.rpc_response_timeout, + timeout=CONF.long_rpc_timeout) + return cctxt.call(ctxt, 'prep_snapshot_based_resize_at_dest', + instance=instance, flavor=flavor, nodename=nodename, + migration=migration, limits=limits, + request_spec=request_spec) + def reboot_instance(self, ctxt, instance, block_device_info, reboot_type): version = '5.0' diff --git a/nova/conf/rpc.py b/nova/conf/rpc.py index 68cd4de631..1490200527 100644 --- a/nova/conf/rpc.py +++ b/nova/conf/rpc.py @@ -30,6 +30,7 @@ Operations with RPC calls that utilize this value: * scheduling * enabling/disabling a compute service * image pre-caching +* snapshot-based / cross-cell resize Related options: diff --git a/nova/objects/service.py b/nova/objects/service.py index 68cd3ebc73..e421bac401 100644 --- a/nova/objects/service.py +++ b/nova/objects/service.py @@ -31,7 +31,7 @@ LOG = logging.getLogger(__name__) # NOTE(danms): This is the global service version counter -SERVICE_VERSION = 41 +SERVICE_VERSION = 42 # NOTE(danms): This is our SERVICE_VERSION history. The idea is that any @@ -163,6 +163,8 @@ SERVICE_VERSION_HISTORY = ( {'compute_rpc': '5.3'}, # Version 41: Add cache_images() to compute rpcapi (version 5.4) {'compute_rpc': '5.4'}, + # Version 42: Compute RPC version 5.5; +prep_snapshot_based_resize_at_dest + {'compute_rpc': '5.5'}, ) diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index 100332da9e..7cf05348ed 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -10167,6 +10167,141 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, self.assertEqual(new_dev.address, updated_nw_info[1]['profile']['pci_slot']) + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'get_allocs_for_consumer') + def test_prep_snapshot_based_resize_at_dest(self, get_allocs): + """Tests happy path for prep_snapshot_based_resize_at_dest""" + # Setup mocks. + flavor = self.instance.flavor + limits = objects.SchedulerLimits() + request_spec = objects.RequestSpec() + # resize_claim normally sets instance.migration_context and returns + # a MoveClaim which is a context manager. Rather than deal with + # mocking a context manager we just set the migration_context on the + # fake instance ahead of time to ensure it is returned as expected. + self.instance.migration_context = objects.MigrationContext() + with test.nested( + mock.patch.object(self.compute, '_send_prep_resize_notifications'), + mock.patch.object(self.compute.rt, 'resize_claim'), + ) as ( + _send_prep_resize_notifications, resize_claim, + ): + # Run the code. + mc = self.compute.prep_snapshot_based_resize_at_dest( + self.context, self.instance, flavor, 'nodename', + self.migration, limits, request_spec) + self.assertIs(mc, self.instance.migration_context) + # Assert the mock calls. + _send_prep_resize_notifications.assert_has_calls([ + mock.call(self.context, self.instance, + fields.NotificationPhase.START, flavor), + mock.call(self.context, self.instance, + fields.NotificationPhase.END, flavor)]) + resize_claim.assert_called_once_with( + self.context, self.instance, flavor, 'nodename', self.migration, + get_allocs.return_value['allocations'], + image_meta=test.MatchType(objects.ImageMeta), limits=limits) + + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'get_allocs_for_consumer') + @mock.patch('nova.compute.utils.add_instance_fault_from_exc') + def test_prep_snapshot_based_resize_at_dest_get_allocs_fails( + self, add_fault, get_allocs): + """Tests that getting allocations fails and ExpectedException + is raised with the MigrationPreCheckError inside. + """ + # Setup mocks. + flavor = self.instance.flavor + limits = objects.SchedulerLimits() + request_spec = objects.RequestSpec() + ex1 = exception.ConsumerAllocationRetrievalFailed( + consumer_uuid=self.instance.uuid, error='oops') + get_allocs.side_effect = ex1 + with test.nested( + mock.patch.object(self.compute, + '_send_prep_resize_notifications'), + mock.patch.object(self.compute.rt, 'resize_claim') + ) as ( + _send_prep_resize_notifications, resize_claim, + ): + # Run the code. + ex2 = self.assertRaises( + messaging.ExpectedException, + self.compute.prep_snapshot_based_resize_at_dest, + self.context, self.instance, flavor, 'nodename', + self.migration, limits, request_spec) + wrapped_exc = ex2.exc_info[1] + # The original error should be in the MigrationPreCheckError which + # itself is in the ExpectedException. + self.assertIn(ex1.format_message(), six.text_type(wrapped_exc)) + # Assert the mock calls. + _send_prep_resize_notifications.assert_has_calls([ + mock.call(self.context, self.instance, + fields.NotificationPhase.START, flavor), + mock.call(self.context, self.instance, + fields.NotificationPhase.END, flavor)]) + resize_claim.assert_not_called() + # Assert the decorators that are triggered on error + add_fault.assert_called_once_with( + self.context, self.instance, wrapped_exc, mock.ANY) + # There would really be three notifications but because we mocked out + # _send_prep_resize_notifications there is just the one error + # notification from the wrap_exception decorator. + self.assertEqual(1, len(fake_notifier.VERSIONED_NOTIFICATIONS)) + self.assertEqual( + 'compute.%s' % fields.NotificationAction.EXCEPTION, + fake_notifier.VERSIONED_NOTIFICATIONS[0]['event_type']) + + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'get_allocs_for_consumer') + @mock.patch('nova.compute.utils.add_instance_fault_from_exc') + def test_prep_snapshot_based_resize_at_dest_claim_fails( + self, add_fault, get_allocs): + """Tests that the resize_claim fails and ExpectedException + is raised with the MigrationPreCheckError inside. + """ + # Setup mocks. + flavor = self.instance.flavor + limits = objects.SchedulerLimits() + request_spec = objects.RequestSpec() + ex1 = exception.ComputeResourcesUnavailable(reason='numa') + with test.nested( + mock.patch.object(self.compute, '_send_prep_resize_notifications'), + mock.patch.object(self.compute.rt, 'resize_claim', side_effect=ex1) + ) as ( + _send_prep_resize_notifications, resize_claim, + ): + # Run the code. + ex2 = self.assertRaises( + messaging.ExpectedException, + self.compute.prep_snapshot_based_resize_at_dest, + self.context, self.instance, flavor, 'nodename', + self.migration, limits, request_spec) + wrapped_exc = ex2.exc_info[1] + # The original error should be in the MigrationPreCheckError which + # itself is in the ExpectedException. + self.assertIn(ex1.format_message(), six.text_type(wrapped_exc)) + # Assert the mock calls. + _send_prep_resize_notifications.assert_has_calls([ + mock.call(self.context, self.instance, + fields.NotificationPhase.START, flavor), + mock.call(self.context, self.instance, + fields.NotificationPhase.END, flavor)]) + resize_claim.assert_called_once_with( + self.context, self.instance, flavor, 'nodename', self.migration, + get_allocs.return_value['allocations'], + image_meta=test.MatchType(objects.ImageMeta), limits=limits) + # Assert the decorators that are triggered on error + add_fault.assert_called_once_with( + self.context, self.instance, wrapped_exc, mock.ANY) + # There would really be three notifications but because we mocked out + # _send_prep_resize_notifications there is just the one error + # notification from the wrap_exception decorator. + self.assertEqual(1, len(fake_notifier.VERSIONED_NOTIFICATIONS)) + self.assertEqual( + 'compute.%s' % fields.NotificationAction.EXCEPTION, + fake_notifier.VERSIONED_NOTIFICATIONS[0]['event_type']) + class ComputeManagerInstanceUsageAuditTestCase(test.TestCase): def setUp(self): diff --git a/nova/tests/unit/compute/test_rpcapi.py b/nova/tests/unit/compute/test_rpcapi.py index a11d1bcd89..c366cd9382 100644 --- a/nova/tests/unit/compute/test_rpcapi.py +++ b/nova/tests/unit/compute/test_rpcapi.py @@ -19,10 +19,12 @@ Unit Tests for nova.compute.rpcapi import mock from oslo_serialization import jsonutils from oslo_utils.fixture import uuidsentinel as uuids +import six from nova.compute import rpcapi as compute_rpcapi from nova import context from nova import exception +from nova import objects from nova.objects import block_device as objects_block_dev from nova.objects import migration as migration_obj from nova.objects import service as service_obj @@ -494,6 +496,45 @@ class ComputeRpcAPITestCase(test.NoDBTestCase): node='node', clean_shutdown=True, host_list=None, version='5.1') + def test_prep_snapshot_based_resize_at_dest(self): + """Tests happy path for prep_snapshot_based_resize_at_dest rpc call""" + self.flags(long_rpc_timeout=1234) + self._test_compute_api( + 'prep_snapshot_based_resize_at_dest', 'call', + # compute method kwargs + instance=self.fake_instance_obj, + flavor=self.fake_flavor_obj, + nodename='node', + migration=migration_obj.Migration(), + limits={}, + request_spec=objects.RequestSpec(), + destination='dest', + # client.prepare kwargs + version='5.5', call_monitor_timeout=60, timeout=1234, + # assert the expected return value + _return_value=mock.sentinel.migration_context) + + @mock.patch('nova.rpc.ClientRouter.client') + def test_prep_snapshot_based_resize_at_dest_old_compute(self, mock_client): + """Tests when the destination compute service is too old to call + prep_snapshot_based_resize_at_dest so MigrationPreCheckError is + raised. + """ + mock_client.return_value.can_send_version.return_value = False + rpcapi = compute_rpcapi.ComputeAPI() + ex = self.assertRaises( + exception.MigrationPreCheckError, + rpcapi.prep_snapshot_based_resize_at_dest, + self.context, + instance=self.fake_instance_obj, + flavor=self.fake_flavor_obj, + nodename='node', + migration=migration_obj.Migration(), + limits={}, + request_spec=objects.RequestSpec(), + destination='dest') + self.assertIn('Compute too old', six.text_type(ex)) + def test_reboot_instance(self): self.maxDiff = None self._test_compute_api('reboot_instance', 'cast',