diff --git a/nova/tests/functional/libvirt/base.py b/nova/tests/functional/libvirt/base.py index c28d230932..47a8bbe81c 100644 --- a/nova/tests/functional/libvirt/base.py +++ b/nova/tests/functional/libvirt/base.py @@ -114,7 +114,7 @@ class ServersTestBase(integrated_helpers._IntegratedTestBase): def start_compute( self, hostname='compute1', host_info=None, pci_info=None, mdev_info=None, vdpa_info=None, libvirt_version=None, - qemu_version=None, + qemu_version=None, cell_name=None, connection=None ): """Start a compute service. @@ -124,16 +124,35 @@ class ServersTestBase(integrated_helpers._IntegratedTestBase): :param host_info: A fakelibvirt.HostInfo object for the host. Defaults to a HostInfo with 2 NUMA nodes, 2 cores per node, 2 threads per core, and 16GB of RAM. + :param connection: A fake libvirt connection. You should not provide it + directly. However it is used by restart_compute_service to + implement restart without loosing the hypervisor state. :returns: The hostname of the created service, which can be used to lookup the created service and UUID of the assocaited resource provider. """ + if connection and ( + host_info or + pci_info or + mdev_info or + vdpa_info or + libvirt_version or + qemu_version + ): + raise ValueError( + "Either an existing connection instance can be provided or a " + "list of parameters for a new connection" + ) def _start_compute(hostname, host_info): - fake_connection = self._get_connection( - host_info, pci_info, mdev_info, vdpa_info, libvirt_version, - qemu_version, hostname, - ) + if connection: + fake_connection = connection + else: + fake_connection = self._get_connection( + host_info, pci_info, mdev_info, vdpa_info, libvirt_version, + qemu_version, hostname, + ) + # If the compute is configured with PCI devices then we need to # make sure that the stubs around sysfs has the MAC address # information for the PCI PF devices @@ -144,7 +163,8 @@ class ServersTestBase(integrated_helpers._IntegratedTestBase): # actually start the service. orig_con = self.mock_conn.return_value self.mock_conn.return_value = fake_connection - compute = self.start_service('compute', host=hostname) + compute = self.start_service( + 'compute', host=hostname, cell_name=cell_name) # Once that's done, we need to tweak the compute "service" to # make sure it returns unique objects. compute.driver._host.get_connection = lambda: fake_connection @@ -165,6 +185,74 @@ class ServersTestBase(integrated_helpers._IntegratedTestBase): return hostname + def restart_compute_service( + self, + hostname, + host_info=None, + pci_info=None, + mdev_info=None, + vdpa_info=None, + libvirt_version=None, + qemu_version=None, + keep_hypervisor_state=True, + ): + """Stops the service and starts a new one to have realistic restart + + :param hostname: the hostname of the nova-compute service to be + restarted + :param keep_hypervisor_state: If True then we reuse the fake connection + from the existing driver. If False a new connection will be created + based on the other parameters provided + """ + # We are intentionally not calling super() here. Nova's base test class + # defines starting and restarting compute service with a very + # different signatures and also those calls are cannot be made aware of + # the intricacies of the libvirt fixture. So we simply hide that + # implementation. + + if keep_hypervisor_state and ( + host_info or + pci_info or + mdev_info or + vdpa_info or + libvirt_version or + qemu_version + ): + raise ValueError( + "Either keep_hypervisor_state=True or a list of libvirt " + "parameters can be provided but not both" + ) + + compute = self.computes.pop(hostname) + self.compute_rp_uuids.pop(hostname) + + # NOTE(gibi): The service interface cannot be used to simulate a real + # service restart as the manager object will not be recreated after a + # service.stop() and service.start() therefore the manager state will + # survive. For example the resource tracker will not be recreated after + # a stop start. The service.kill() call cannot help as it deletes + # the service from the DB which is unrealistic and causes that some + # operation that refers to the killed host (e.g. evacuate) fails. + # So this helper method will stop the original service and then starts + # a brand new compute service for the same host and node. This way + # a new ComputeManager instance will be created and initialized during + # the service startup. + compute.stop() + + # this service was running previously, so we have to make sure that + # we restart it in the same cell + cell_name = self.host_mappings[compute.host].cell_mapping.name + + old_connection = compute.manager.driver._get_connection() + + self.start_compute( + hostname, host_info, pci_info, mdev_info, vdpa_info, + libvirt_version, qemu_version, cell_name, + old_connection if keep_hypervisor_state else None + ) + + return self.computes[hostname] + class LibvirtMigrationMixin(object): """A simple mixin to facilliate successful libvirt live migrations diff --git a/nova/tests/functional/libvirt/test_device_bus_migration.py b/nova/tests/functional/libvirt/test_device_bus_migration.py index 82a0d4556e..3852e31c68 100644 --- a/nova/tests/functional/libvirt/test_device_bus_migration.py +++ b/nova/tests/functional/libvirt/test_device_bus_migration.py @@ -51,7 +51,7 @@ class LibvirtDeviceBusMigration(base.ServersTestBase): def _assert_stashed_image_properties_persist(self, server, properties): # Assert the stashed properties persist across a host reboot - self.restart_compute_service(self.compute) + self.restart_compute_service(self.compute_hostname) self._assert_stashed_image_properties(server['id'], properties) # Assert the stashed properties persist across a guest reboot @@ -173,7 +173,7 @@ class LibvirtDeviceBusMigration(base.ServersTestBase): self.flags(pointer_model='ps2mouse') # Restart compute to pick up ps2 setting, which means the guest will # not get a prescribed pointer device - self.restart_compute_service(self.compute) + self.restart_compute_service(self.compute_hostname) # Create a server with default image properties default_image_properties1 = { @@ -187,7 +187,7 @@ class LibvirtDeviceBusMigration(base.ServersTestBase): # Assert the defaults persist across a host flag change self.flags(pointer_model='usbtablet') # Restart compute to pick up usb setting - self.restart_compute_service(self.compute) + self.restart_compute_service(self.compute_hostname) self._assert_stashed_image_properties( server1['id'], default_image_properties1) @@ -216,7 +216,7 @@ class LibvirtDeviceBusMigration(base.ServersTestBase): # https://bugs.launchpad.net/nova/+bug/1866106 self.flags(pointer_model=None) # Restart compute to pick up None setting - self.restart_compute_service(self.compute) + self.restart_compute_service(self.compute_hostname) self._assert_stashed_image_properties( server1['id'], default_image_properties1) self._assert_stashed_image_properties( diff --git a/nova/tests/functional/libvirt/test_numa_live_migration.py b/nova/tests/functional/libvirt/test_numa_live_migration.py index 2f3897d6b2..0e504d2df2 100644 --- a/nova/tests/functional/libvirt/test_numa_live_migration.py +++ b/nova/tests/functional/libvirt/test_numa_live_migration.py @@ -206,10 +206,8 @@ class NUMALiveMigrationPositiveTests(NUMALiveMigrationPositiveBase): # Increase cpu_dedicated_set to 0-3, expecting the live migrated server # to end up on 2,3. self.flags(cpu_dedicated_set='0-3', group='compute') - self.computes['host_a'] = self.restart_compute_service( - self.computes['host_a']) - self.computes['host_b'] = self.restart_compute_service( - self.computes['host_b']) + self.restart_compute_service('host_a') + self.restart_compute_service('host_b') # Live migrate, RPC-pinning the destination host if asked if pin_dest: @@ -333,10 +331,8 @@ class NUMALiveMigrationRollbackTests(NUMALiveMigrationPositiveBase): # Increase cpu_dedicated_set to 0-3, expecting the live migrated server # to end up on 2,3. self.flags(cpu_dedicated_set='0-3', group='compute') - self.computes['host_a'] = self.restart_compute_service( - self.computes['host_a']) - self.computes['host_b'] = self.restart_compute_service( - self.computes['host_b']) + self.restart_compute_service('host_a') + self.restart_compute_service('host_b') # Live migrate, RPC-pinning the destination host if asked. This is a # rollback test, so server_a is expected to remain on host_a. diff --git a/nova/tests/functional/libvirt/test_numa_servers.py b/nova/tests/functional/libvirt/test_numa_servers.py index e4c2b9317d..bb428159ad 100644 --- a/nova/tests/functional/libvirt/test_numa_servers.py +++ b/nova/tests/functional/libvirt/test_numa_servers.py @@ -1187,10 +1187,8 @@ class ReshapeForPCPUsTest(NUMAServersTestBase): self.flags(cpu_dedicated_set='0-7', group='compute') self.flags(vcpu_pin_set=None) - computes = {} - for host, compute in self.computes.items(): - computes[host] = self.restart_compute_service(compute) - self.computes = computes + for host in list(self.computes.keys()): + self.restart_compute_service(host) # verify that the inventory, usages and allocation are correct after # the reshape diff --git a/nova/tests/functional/libvirt/test_pci_sriov_servers.py b/nova/tests/functional/libvirt/test_pci_sriov_servers.py index 2ab9d6318f..5a2b6347eb 100644 --- a/nova/tests/functional/libvirt/test_pci_sriov_servers.py +++ b/nova/tests/functional/libvirt/test_pci_sriov_servers.py @@ -908,11 +908,8 @@ class SRIOVServersTest(_PCIServersWithMigrationTestBase): # Disable SRIOV capabilties in PF and delete the VFs self._disable_sriov_in_pf(pci_info_no_sriov) - fake_connection = self._get_connection(pci_info=pci_info_no_sriov, - hostname='test_compute0') - self.mock_conn.return_value = fake_connection - - self.compute = self.start_service('compute', host='test_compute0') + self.start_compute('test_compute0', pci_info=pci_info_no_sriov) + self.compute = self.computes['test_compute0'] ctxt = context.get_admin_context() pci_devices = objects.PciDeviceList.get_by_compute_node( @@ -924,13 +921,9 @@ class SRIOVServersTest(_PCIServersWithMigrationTestBase): self.assertEqual(1, len(pci_devices)) self.assertEqual('type-PCI', pci_devices[0].dev_type) - # Update connection with original pci info with sriov PFs - fake_connection = self._get_connection(pci_info=pci_info, - hostname='test_compute0') - self.mock_conn.return_value = fake_connection - - # Restart the compute service - self.restart_compute_service(self.compute) + # Restart the compute service with sriov PFs + self.restart_compute_service( + self.compute.host, pci_info=pci_info, keep_hypervisor_state=False) # Verify if PCI devices are of type type-PF or type-VF pci_devices = objects.PciDeviceList.get_by_compute_node( @@ -1015,10 +1008,9 @@ class SRIOVAttachDetachTest(_PCIServersTestBase): host_info = fakelibvirt.HostInfo(cpu_nodes=2, cpu_sockets=1, cpu_cores=2, cpu_threads=2) pci_info = fakelibvirt.HostPCIDevicesInfo(num_pfs=1, num_vfs=1) - fake_connection = self._get_connection(host_info, pci_info) - self.mock_conn.return_value = fake_connection - - self.compute = self.start_service('compute', host='test_compute0') + self.start_compute( + 'test_compute0', host_info=host_info, pci_info=pci_info) + self.compute = self.computes['test_compute0'] # Create server with a port server = self._create_server(networks=[{'port': first_port_id}]) diff --git a/nova/tests/functional/libvirt/test_reshape.py b/nova/tests/functional/libvirt/test_reshape.py index 893672bb82..1f924739e3 100644 --- a/nova/tests/functional/libvirt/test_reshape.py +++ b/nova/tests/functional/libvirt/test_reshape.py @@ -72,11 +72,11 @@ class VGPUReshapeTests(base.ServersTestBase): # ignore the content of the above HostMdevDeviceInfo self.flags(enabled_mdev_types='', group='devices') - hostname = self.start_compute( + self.hostname = self.start_compute( hostname='compute1', mdev_info=fakelibvirt.HostMdevDevicesInfo(devices=mdevs), ) - self.compute = self.computes[hostname] + self.compute = self.computes[self.hostname] # create the VGPU resource in placement manually compute_rp_uuid = self.placement.get( @@ -158,7 +158,7 @@ class VGPUReshapeTests(base.ServersTestBase): allocations[compute_rp_uuid]['resources']) # restart compute which will trigger a reshape - self.compute = self.restart_compute_service(self.compute) + self.compute = self.restart_compute_service(self.hostname) # verify that the inventory, usages and allocation are correct after # the reshape diff --git a/nova/tests/functional/libvirt/test_vgpu.py b/nova/tests/functional/libvirt/test_vgpu.py index e111f50de0..686582120a 100644 --- a/nova/tests/functional/libvirt/test_vgpu.py +++ b/nova/tests/functional/libvirt/test_vgpu.py @@ -113,8 +113,8 @@ class VGPUTestBase(base.ServersTestBase): parent=libvirt_parent)}) return uuid - def start_compute(self, hostname): - hostname = super().start_compute( + def start_compute_with_vgpu(self, hostname): + hostname = self.start_compute( pci_info=fakelibvirt.HostPCIDevicesInfo( num_pci=0, num_pfs=0, num_vfs=0, num_mdevcap=2, ), @@ -197,7 +197,7 @@ class VGPUTests(VGPUTestBase): enabled_mdev_types=fakelibvirt.NVIDIA_11_VGPU_TYPE, group='devices') - self.compute1 = self.start_compute('host1') + self.compute1 = self.start_compute_with_vgpu('host1') def assert_vgpu_usage_for_compute(self, compute, expected): self.assert_mdev_usage(compute, expected_amount=expected) @@ -211,7 +211,7 @@ class VGPUTests(VGPUTestBase): def test_resize_servers_with_vgpu(self): # Add another compute for the sake of resizing - self.compute2 = self.start_compute('host2') + self.compute2 = self.start_compute_with_vgpu('host2') server = self._create_server( image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6', flavor_id=self.flavor, host=self.compute1.host, @@ -337,7 +337,7 @@ class VGPUMultipleTypesTests(VGPUTestBase): # Prepare traits for later on self._create_trait('CUSTOM_NVIDIA_11') self._create_trait('CUSTOM_NVIDIA_12') - self.compute1 = self.start_compute('host1') + self.compute1 = self.start_compute_with_vgpu('host1') def test_create_servers_with_vgpu(self): self._create_server( @@ -369,13 +369,12 @@ class VGPUMultipleTypesTests(VGPUTestBase): def test_create_servers_with_specific_type(self): # Regenerate the PCI addresses so both pGPUs now support nvidia-12 - connection = self.computes[ - self.compute1.host].driver._host.get_connection() - connection.pci_info = fakelibvirt.HostPCIDevicesInfo( + pci_info = fakelibvirt.HostPCIDevicesInfo( num_pci=0, num_pfs=0, num_vfs=0, num_mdevcap=2, multiple_gpu_types=True) # Make a restart to update the Resource Providers - self.compute1 = self.restart_compute_service(self.compute1) + self.compute1 = self.restart_compute_service( + self.compute1.host, pci_info=pci_info, keep_hypervisor_state=False) pgpu1_rp_uuid = self._get_provider_uuid_by_name( self.compute1.host + '_' + fakelibvirt.MDEVCAP_DEV1_PCI_ADDR) pgpu2_rp_uuid = self._get_provider_uuid_by_name( @@ -451,7 +450,7 @@ class DifferentMdevClassesTests(VGPUTestBase): group='mdev_nvidia-12') self.flags(mdev_class='CUSTOM_NOTVGPU', group='mdev_mlx5_core') - self.compute1 = self.start_compute('host1') + self.compute1 = self.start_compute_with_vgpu('host1') # Regenerate the PCI addresses so they can support both mlx5 and # nvidia-12 types connection = self.computes[ @@ -460,7 +459,7 @@ class DifferentMdevClassesTests(VGPUTestBase): num_pci=0, num_pfs=0, num_vfs=0, num_mdevcap=2, generic_types=True) # Make a restart to update the Resource Providers - self.compute1 = self.restart_compute_service(self.compute1) + self.compute1 = self.restart_compute_service('host1') def test_create_servers_with_different_mdev_classes(self): physdev1_rp_uuid = self._get_provider_uuid_by_name( @@ -498,7 +497,7 @@ class DifferentMdevClassesTests(VGPUTestBase): def test_resize_servers_with_mlx5(self): # Add another compute for the sake of resizing - self.compute2 = self.start_compute('host2') + self.compute2 = self.start_compute_with_vgpu('host2') # Regenerate the PCI addresses so they can support both mlx5 and # nvidia-12 types connection = self.computes[ @@ -507,7 +506,7 @@ class DifferentMdevClassesTests(VGPUTestBase): num_pci=0, num_pfs=0, num_vfs=0, num_mdevcap=2, generic_types=True) # Make a restart to update the Resource Providers - self.compute2 = self.restart_compute_service(self.compute2) + self.compute2 = self.restart_compute_service('host2') # Use the new flavor for booting server = self._create_server(