From 489aab934cc504b89201d70217466d4f53868768 Mon Sep 17 00:00:00 2001 From: Sylvain Bauza Date: Wed, 20 Dec 2023 15:56:59 +0100 Subject: [PATCH] Check if destination can support the src mdev types Now that the source knows that both the computes support the right libvirt version, it passes to the destination the list of mdevs it has for the instance. By this change, we'll verify if the types of those mdevs are actually supported by the destination. On the next change, we'll pass the destination mdevs back to the source. Partially-Implements: blueprint libvirt-mdev-live-migrate Change-Id: Icb52fa5eb0adc0aa6106a90d87149456b39e79c2 --- nova/compute/manager.py | 4 +++ nova/tests/functional/libvirt/test_vgpu.py | 28 ++++++++++++++++ nova/tests/unit/compute/test_compute_mgr.py | 12 ++++++- nova/tests/unit/virt/libvirt/test_driver.py | 37 +++++++++++++++++++++ nova/virt/driver.py | 16 +++++++++ nova/virt/libvirt/driver.py | 31 +++++++++++++++++ 6 files changed, 127 insertions(+), 1 deletion(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index c7b25ac26a..2b4c7bebc9 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -8567,6 +8567,10 @@ class ComputeManager(manager.Manager): # Update migrate VIFs with the newly claimed PCI devices self._update_migrate_vifs_profile_with_pci( migrate_data.vifs, port_id_to_pci) + # This should always be the last check as we persist some internal + # dictionary value in the libvirt driver. + migrate_data = self.driver.check_source_migrate_data_at_dest( + ctxt, instance, migrate_data, migration, limits, allocs) finally: self.driver.cleanup_live_migration_destination_check(ctxt, dest_check_data) diff --git a/nova/tests/functional/libvirt/test_vgpu.py b/nova/tests/functional/libvirt/test_vgpu.py index ac04d55553..8764dc5d84 100644 --- a/nova/tests/functional/libvirt/test_vgpu.py +++ b/nova/tests/functional/libvirt/test_vgpu.py @@ -540,6 +540,34 @@ class VGPULiveMigrationTests(base.LibvirtMigrationMixin, VGPUTestBase): '' % server['id'], log_out) + def test_live_migration_fails_due_to_non_supported_mdev_types(self): + self.flags( + enabled_mdev_types=[fakelibvirt.NVIDIA_11_VGPU_TYPE], + group='devices') + self.src = self.restart_compute_service(self.src.host) + self.flags( + enabled_mdev_types=[fakelibvirt.NVIDIA_12_VGPU_TYPE], + group='devices') + self.dest = self.restart_compute_service(self.dest.host) + # Force a periodic run in order to make sure all service resources + # are changed before we call create_service() + self._run_periodics() + self.server = self._create_server( + image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6', + flavor_id=self.flavor, networks='auto', host=self.src.host) + # now live migrate that server + ex = self.assertRaises( + client.OpenStackApiException, + self._live_migrate, + self.server, 'completed') + self.assertEqual(500, ex.response.status_code) + self.assertIn('NoValidHost', str(ex)) + log_out = self.stdlog.logger.output + # The log is fully JSON-serialized, so just check the phrase. + self.assertIn('Unable to migrate %s: ' % self.server['id'], log_out) + self.assertIn('Source mdev types ', log_out) + self.assertIn('are not supported by this compute : ', log_out) + def test_live_migrate_server(self): self.server = self._create_server( image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6', diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index 774be3b788..fe49e12b58 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -3576,10 +3576,13 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, '_update_migrate_vifs_profile_with_pci'), mock.patch.object(self.compute, '_dest_can_numa_live_migrate'), mock.patch('nova.compute.manager.LOG'), + mock.patch.object(self.compute.driver, + 'check_source_migrate_data_at_dest'), ) as (mock_lm_claim, mock_get, mock_check_dest, mock_check_src, mock_check_clean, mock_fault_create, mock_event, mock_create_mig_vif, mock_nw_info, mock_claim_pci, - mock_update_mig_vif, mock_dest_can_numa, mock_log): + mock_update_mig_vif, mock_dest_can_numa, mock_log, + mock_post_check): mock_get.side_effect = (src_info, dest_info) mock_check_dest.return_value = dest_check_data mock_dest_can_numa.return_value = dest_check_data @@ -3587,6 +3590,10 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, dst_numa_info=objects.LibvirtLiveMigrateNUMAInfo()) mock_lm_claim.return_value = post_claim_md + # In the case of a NUMA related claim, migrate_data is updated + post_claim_mig_data = post_claim_md if src_numa_lm else mig_data + mock_post_check.return_value = post_claim_mig_data + if do_raise: mock_check_src.side_effect = test.TestingException mock_fault_create.return_value = \ @@ -3631,6 +3638,9 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, mock_event.assert_called_once_with( self.context, 'compute_check_can_live_migrate_destination', CONF.host, instance.uuid, graceful_exit=False) + mock_post_check.assert_called_once_with( + self.context, instance, post_claim_mig_data, migration, limits, + None) return result @mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock( diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index 707d171c9b..070bf9eaa6 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -11699,6 +11699,43 @@ class LibvirtConnTestCase(test.NoDBTestCase, for vif in result.vifs: self.assertTrue(vif.supports_os_vif_delegation) + def test_check_source_migrate_data_at_dest_no_src_mdevs(self): + """Noop test in case no mdevs from the source.""" + instance_ref = objects.Instance(**self.test_instance) + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + dest_check_data = objects.LibvirtLiveMigrateData( + filename="file", + block_migration=True, + disk_over_commit=False, + disk_available_mb=1024) + allocations = {uuids.rp1: {'resources': {orc.VGPU: 1}}} + result = drvr.check_source_migrate_data_at_dest( + self.context, instance_ref, dest_check_data, None, None, + allocations) + self.assertEqual(dest_check_data, result) + + @mock.patch.object(libvirt_driver.LibvirtDriver, + '_get_supported_vgpu_types') + def test_check_source_migrate_data_at_dest_types_not_supported(self, + mock_get): + """Raises an exception if the destination doesn't support the source + mdev types. + """ + mock_get.return_value = ['type1'] + instance_ref = objects.Instance(**self.test_instance) + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + dest_check_data = objects.LibvirtLiveMigrateData( + filename="file", + block_migration=True, + disk_over_commit=False, + disk_available_mb=1024, + source_mdev_types={uuids.mdev1: 'wrongtype'}) + allocations = {uuids.rp1: {'resources': {orc.VGPU: 1}}} + self.assertRaises(exception.MigrationPreCheckError, + drvr.check_source_migrate_data_at_dest, + self.context, instance_ref, dest_check_data, + None, None, allocations) + @mock.patch.object(fakelibvirt.Connection, 'getLibVersion') @mock.patch.object(fakelibvirt.Connection, 'getVersion') def _test_host_can_support_mdev_lm(self, mock_getversion, diff --git a/nova/virt/driver.py b/nova/virt/driver.py index c24b80826e..79cb6d77f5 100644 --- a/nova/virt/driver.py +++ b/nova/virt/driver.py @@ -1322,6 +1322,22 @@ class ComputeDriver(object): """ raise NotImplementedError() + def check_source_migrate_data_at_dest(self, ctxt, instance, migrate_data, + migration, limits, allocs): + """Runs the last checks on the destination after the source returned + the migrate_data. + + :param ctxt: security context + :param instance: nova.db.main.models.Instance + :param migrate_data: result of check_can_live_migrate_source + :param migration: The Migration object for this live migration + :param limits: The SchedulerLimits object for this live migration + :param allocs: Allocations for this instance + :returns: a LibvirtLiveMigrateData object + :raises: MigrationPreCheckError + """ + return migrate_data + def post_claim_migrate_data(self, context, instance, migrate_data, claim): """Returns migrate_data augmented with any information obtained from the claim. Intended to run on the destination of a live-migration diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 23908369b3..707a4147bb 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -9771,6 +9771,37 @@ class LibvirtDriver(driver.ComputeDriver): return data + def check_source_migrate_data_at_dest(self, ctxt, instance, migrate_data, + migration, limits, allocs): + """Runs the last checks on the destination after the source returned + the migrate_data. + + :param ctxt: security context + :param instance: nova.db.main.models.Instance + :param migrate_data: result of check_can_live_migrate_source + :param migration: The Migration object for this live migration + :param limits: The SchedulerLimits object for this live migration + :param allocs: Allocations for this instance + :returns: a LibvirtLiveMigrateData object + :raises: MigrationPreCheckError + """ + if ('source_mdev_types' in migrate_data and + migrate_data.source_mdev_types): + # The instance that needs to be live-migrated has some mdevs + src_mdev_types = migrate_data.source_mdev_types + # As a reminder, src_mdev_types is a dict of mdev UUID and its type + # Are all the types supported by this compute ? + if not all(map(lambda m_type: m_type in self.supported_vgpu_types, + src_mdev_types.values())): + reason = (_('Unable to migrate %(instance_uuid)s: ' + 'Source mdev types %(src_types)s are not ' + 'supported by this compute : %(dest_types)s ' % + {'instance_uuid': instance.uuid, + 'src_types': list(src_mdev_types.values()), + 'dest_types': self.supported_vgpu_types})) + raise exception.MigrationPreCheckError(reason) + return migrate_data + def post_claim_migrate_data(self, context, instance, migrate_data, claim): migrate_data.dst_numa_info = self._get_live_migrate_numa_info( claim.claimed_numa_topology, claim.flavor, claim.image_meta)