diff --git a/nova/compute/pci_placement_translator.py b/nova/compute/pci_placement_translator.py index 77414b5d18..c97647be47 100644 --- a/nova/compute/pci_placement_translator.py +++ b/nova/compute/pci_placement_translator.py @@ -166,9 +166,29 @@ class PciResourceProvider: self.resource_class = _get_rc_for_dev(dev, dev_spec_tags) self.traits = _get_traits_for_dev(dev_spec_tags) + def remove_child(self, dev: pci_device.PciDevice) -> None: + # Nothing to do here. The update_provider_tree will handle the + # inventory decrease or the full RP removal + pass + + def remove_parent(self, dev: pci_device.PciDevice) -> None: + # Nothing to do here. The update_provider_tree we handle full RP + pass + def update_provider_tree( self, provider_tree: provider_tree.ProviderTree ) -> None: + + if not self.parent_dev and not self.children_devs: + # This means we need to delete the RP from placement if exists + if provider_tree.exists(self.name): + # NOTE(gibi): If there are allocations on this RP then + # Placement will reject the update the provider_tree is + # synced up. + provider_tree.remove(self.name) + + return + provider_tree.update_inventory( self.name, # NOTE(gibi): The rest of the inventory fields (reserved, @@ -188,10 +208,13 @@ class PciResourceProvider: provider_tree.update_traits(self.name, self.traits) def __str__(self) -> str: - return ( - f"RP({self.name}, {self.resource_class}={len(self.devs)}, " - f"traits={','.join(self.traits or set())})" - ) + if self.devs: + return ( + f"RP({self.name}, {self.resource_class}={len(self.devs)}, " + f"traits={','.join(self.traits or set())})" + ) + else: + return f"RP({self.name}, )" class PlacementView: @@ -207,9 +230,7 @@ class PlacementView: def _ensure_rp(self, rp_name: str) -> PciResourceProvider: return self.rps.setdefault(rp_name, PciResourceProvider(rp_name)) - def _add_child( - self, dev: pci_device.PciDevice, dev_spec_tags: ty.Dict[str, str] - ) -> None: + def _get_rp_name_for_child(self, dev: pci_device.PciDevice) -> str: if not dev.parent_addr: msg = _( "Missing parent address for PCI device s(dev)% with " @@ -220,7 +241,12 @@ class PlacementView: } raise exception.PlacementPciException(error=msg) - rp_name = self._get_rp_name_for_address(dev.parent_addr) + return self._get_rp_name_for_address(dev.parent_addr) + + def _add_child( + self, dev: pci_device.PciDevice, dev_spec_tags: ty.Dict[str, str] + ) -> None: + rp_name = self._get_rp_name_for_child(dev) self._ensure_rp(rp_name).add_child(dev, dev_spec_tags) def _add_parent( @@ -229,7 +255,7 @@ class PlacementView: rp_name = self._get_rp_name_for_address(dev.address) self._ensure_rp(rp_name).add_parent(dev, dev_spec_tags) - def add_dev( + def _add_dev( self, dev: pci_device.PciDevice, dev_spec_tags: ty.Dict[str, str] ) -> None: if dev_spec_tags.get("physical_network"): @@ -263,6 +289,46 @@ class PlacementView: # check for running migrations. pass + def _remove_child(self, dev: pci_device.PciDevice) -> None: + rp_name = self._get_rp_name_for_child(dev) + self._ensure_rp(rp_name).remove_child(dev) + + def _remove_parent(self, dev: pci_device.PciDevice) -> None: + rp_name = self._get_rp_name_for_address(dev.address) + self._ensure_rp(rp_name).remove_parent(dev) + + def _remove_dev(self, dev: pci_device.PciDevice) -> None: + """Remove PCI devices from Placement that existed before but now + deleted from the hypervisor or unlisted from [pci]device_spec + """ + if dev.dev_type in PARENT_TYPES: + self._remove_parent(dev) + elif dev.dev_type in CHILD_TYPES: + self._remove_child(dev) + + def process_dev( + self, + dev: pci_device.PciDevice, + dev_spec: ty.Optional[devspec.PciDeviceSpec], + ) -> None: + + if dev.status in ( + fields.PciDeviceStatus.DELETED, + fields.PciDeviceStatus.REMOVED, + ): + self._remove_dev(dev) + else: + if not dev_spec: + LOG.warning( + "Device spec is not found for device %s in " + "[pci]device_spec. Ignoring device in Placement resource " + "view. This should not happen. Please file a bug.", + dev.address + ) + return + + self._add_dev(dev, dev_spec.get_tags()) + def __str__(self) -> str: return ( f"Placement PCI view on {self.root_rp_name}: " @@ -341,14 +407,7 @@ def update_provider_tree_for_pci( # match the PCI device with the [pci]dev_spec config to access # the configuration metadata tags dev_spec = pci_tracker.dev_filter.get_devspec(dev) - if not dev_spec: - LOG.warning( - "Device spec is not found for device %s in [pci]device_spec. " - "Ignoring device in Placement resource view. " - "This should not happen. Please file a bug.", dev.address) - continue - - pv.add_dev(dev, dev_spec.get_tags()) + pv.process_dev(dev, dev_spec) LOG.info("Placement PCI resource view: %s", pv) diff --git a/nova/tests/functional/libvirt/test_pci_in_placement.py b/nova/tests/functional/libvirt/test_pci_in_placement.py index dce30ec44c..159c4ca6a2 100644 --- a/nova/tests/functional/libvirt/test_pci_in_placement.py +++ b/nova/tests/functional/libvirt/test_pci_in_placement.py @@ -338,3 +338,341 @@ class PlacementPCIReportingTests(test_pci_sriov_servers._PCIServersTestBase): "instead.", str(ex) ) + + def test_remove_pci(self): + # The fake libvirt will emulate on the host: + # * one type-PCI + pci_info = fakelibvirt.HostPCIDevicesInfo( + num_pci=1, num_pfs=0, num_vfs=0) + # the config matches that PCI dev + device_spec = self._to_device_spec_conf( + [ + { + "vendor_id": fakelibvirt.PCI_VEND_ID, + "product_id": fakelibvirt.PCI_PROD_ID, + }, + ] + ) + self.flags(group='pci', device_spec=device_spec) + self.start_compute(hostname="compute1", pci_info=pci_info) + + self.assert_placement_pci_view( + "compute1", + inventories={ + "0000:81:00.0": {self.PCI_RC: 1}, + }, + traits={ + "0000:81:00.0": [], + }, + ) + + # now un-configure the PCI device and restart the compute + self.flags(group='pci', device_spec=self._to_device_spec_conf([])) + self.restart_compute_service(hostname="compute1") + + # the RP had no allocation so nova could remove it + self.assert_placement_pci_view( + "compute1", + inventories={}, + traits={}, + ) + + def test_remove_one_vf(self): + # The fake libvirt will emulate on the host: + # * one type-PFs in slot 0 with two type-VFs 00.1, 00.2 + pci_info = fakelibvirt.HostPCIDevicesInfo( + num_pci=0, num_pfs=1, num_vfs=2) + # then the config matching the VFs + device_spec = self._to_device_spec_conf( + [ + { + "vendor_id": fakelibvirt.PCI_VEND_ID, + "product_id": fakelibvirt.VF_PROD_ID, + }, + ] + ) + self.flags(group='pci', device_spec=device_spec) + self.start_compute(hostname="compute1", pci_info=pci_info) + + self.assert_placement_pci_view( + "compute1", + inventories={ + "0000:81:00.0": {self.VF_RC: 2}, + }, + traits={ + "0000:81:00.0": [], + }, + ) + + # remove one of the VFs from the hypervisor and then restart the + # compute + pci_info = fakelibvirt.HostPCIDevicesInfo( + num_pci=0, num_pfs=1, num_vfs=1) + self.restart_compute_service( + hostname="compute1", + pci_info=pci_info, + keep_hypervisor_state=False + ) + + # total value is expected to decrease to 1 + self.assert_placement_pci_view( + "compute1", + inventories={ + "0000:81:00.0": {self.VF_RC: 1}, + }, + traits={ + "0000:81:00.0": [], + }, + ) + + def test_remove_all_vfs(self): + # The fake libvirt will emulate on the host: + # * one type-PFs in slot 0 with two type-VFs 00.1, 00.2 + pci_info = fakelibvirt.HostPCIDevicesInfo( + num_pci=0, num_pfs=1, num_vfs=2) + # then the config patches the VFs + device_spec = self._to_device_spec_conf( + [ + { + "vendor_id": fakelibvirt.PCI_VEND_ID, + "product_id": fakelibvirt.VF_PROD_ID, + }, + ] + ) + self.flags(group='pci', device_spec=device_spec) + self.start_compute(hostname="compute1", pci_info=pci_info) + + self.assert_placement_pci_view( + "compute1", + inventories={ + "0000:81:00.0": {self.VF_RC: 2}, + }, + traits={ + "0000:81:00.0": [], + }, + ) + + # remove both VFs from the hypervisor and restart the compute + pci_info = fakelibvirt.HostPCIDevicesInfo( + num_pci=0, num_pfs=1, num_vfs=0) + self.restart_compute_service( + hostname="compute1", + pci_info=pci_info, + keep_hypervisor_state=False + ) + + # we expect that the RP is deleted + self.assert_placement_pci_view( + "compute1", + inventories={}, + traits={}, + ) + + def test_remove_all_vfs_add_pf(self): + # The fake libvirt will emulate on the host: + # * one type-PFs in slot 0 with two type-VFs 00.1, 00.2 + pci_info = fakelibvirt.HostPCIDevicesInfo( + num_pci=0, num_pfs=1, num_vfs=2) + # then the config matches both VFs + device_spec = self._to_device_spec_conf( + [ + { + "vendor_id": fakelibvirt.PCI_VEND_ID, + "product_id": fakelibvirt.VF_PROD_ID, + }, + ] + ) + self.flags(group='pci', device_spec=device_spec) + self.start_compute(hostname="compute1", pci_info=pci_info) + + self.assert_placement_pci_view( + "compute1", + inventories={ + "0000:81:00.0": {self.VF_RC: 2}, + }, + traits={ + "0000:81:00.0": [], + }, + ) + + # change the config to match the PF but do not match the VFs and + # restart the compute + device_spec = self._to_device_spec_conf( + [ + { + "vendor_id": fakelibvirt.PCI_VEND_ID, + "product_id": fakelibvirt.PF_PROD_ID, + }, + ] + ) + self.flags(group='pci', device_spec=device_spec) + self.restart_compute_service( + hostname="compute1", + pci_info=pci_info, + keep_hypervisor_state=False + ) + + # we expect that VF inventory is removed and the PF inventory is added + self.assert_placement_pci_view( + "compute1", + inventories={ + "0000:81:00.0": {self.PF_RC: 1}, + }, + traits={ + "0000:81:00.0": [], + }, + ) + + def test_remove_pf_add_vfs(self): + # The fake libvirt will emulate on the host: + # * one type-PFs in slot 0 with two type-VFs 00.1, 00.2 + pci_info = fakelibvirt.HostPCIDevicesInfo( + num_pci=0, num_pfs=1, num_vfs=2) + # then the config only matches the PF + device_spec = self._to_device_spec_conf( + [ + { + "vendor_id": fakelibvirt.PCI_VEND_ID, + "product_id": fakelibvirt.PF_PROD_ID, + }, + ] + ) + self.flags(group='pci', device_spec=device_spec) + self.start_compute(hostname="compute1", pci_info=pci_info) + + self.assert_placement_pci_view( + "compute1", + inventories={ + "0000:81:00.0": {self.PF_RC: 1}, + }, + traits={ + "0000:81:00.0": [], + }, + ) + + # remove the PF from the config and add the VFs instead then restart + # the compute + device_spec = self._to_device_spec_conf( + [ + { + "vendor_id": fakelibvirt.PCI_VEND_ID, + "product_id": fakelibvirt.VF_PROD_ID, + }, + ] + ) + self.flags(group='pci', device_spec=device_spec) + self.restart_compute_service( + hostname="compute1", + pci_info=pci_info, + keep_hypervisor_state=False + ) + + # we expect that PF inventory is removed and the VF inventory is added + self.assert_placement_pci_view( + "compute1", + inventories={ + "0000:81:00.0": {self.VF_RC: 2}, + }, + traits={ + "0000:81:00.0": [], + }, + ) + + def test_device_reconfiguration(self): + # The fake libvirt will emulate on the host: + # * two type-PFs in slot 0, 1 with two type-VFs each + pci_info = fakelibvirt.HostPCIDevicesInfo( + num_pci=0, num_pfs=2, num_vfs=4) + # from slot 0 we match the PF only and ignore the VFs + # from slot 1 we match the VFs but ignore the parent PF + device_spec = self._to_device_spec_conf( + [ + { + "vendor_id": fakelibvirt.PCI_VEND_ID, + "product_id": fakelibvirt.PF_PROD_ID, + "address": "0000:81:00.0", + "traits": ",".join( + [os_traits.HW_NIC_SRIOV, "CUSTOM_PF", "pf-white"] + ), + }, + { + "vendor_id": fakelibvirt.PCI_VEND_ID, + "product_id": fakelibvirt.VF_PROD_ID, + "address": "0000:81:01.*", + "traits": ",".join( + [os_traits.HW_NIC_SRIOV_TRUSTED, "CUSTOM_VF", "vf-red"] + ), + }, + ] + ) + self.flags(group='pci', device_spec=device_spec) + self.start_compute(hostname="compute1", pci_info=pci_info) + + self.assert_placement_pci_view( + "compute1", + inventories={ + "0000:81:00.0": {self.PF_RC: 1}, + "0000:81:01.0": {self.VF_RC: 2}, + }, + traits={ + "0000:81:00.0": [ + "HW_NIC_SRIOV", + "CUSTOM_PF", + "CUSTOM_PF_WHITE", + ], + "0000:81:01.0": [ + "HW_NIC_SRIOV_TRUSTED", + "CUSTOM_VF", + "CUSTOM_VF_RED", + ], + }, + ) + + # change the resource class and traits configuration and restart the + # compute + device_spec = self._to_device_spec_conf( + [ + { + "product_id": fakelibvirt.PF_PROD_ID, + "resource_class": "CUSTOM_PF", + "address": "0000:81:00.0", + "traits": ",".join( + [os_traits.HW_NIC_SRIOV, "pf-black"] + ), + }, + { + "vendor_id": fakelibvirt.PCI_VEND_ID, + "product_id": fakelibvirt.VF_PROD_ID, + "resource_class": "CUSTOM_VF", + "address": "0000:81:01.*", + "traits": ",".join( + [os_traits.HW_NIC_SRIOV_TRUSTED, "vf-blue", "foobar"] + ), + }, + ] + ) + self.flags(group='pci', device_spec=device_spec) + self.restart_compute_service( + hostname="compute1", + pci_info=pci_info, + keep_hypervisor_state=False + ) + + self.assert_placement_pci_view( + "compute1", + inventories={ + "0000:81:00.0": {"CUSTOM_PF": 1}, + "0000:81:01.0": {"CUSTOM_VF": 2}, + }, + traits={ + "0000:81:00.0": [ + "HW_NIC_SRIOV", + "CUSTOM_PF_BLACK", + ], + "0000:81:01.0": [ + "HW_NIC_SRIOV_TRUSTED", + "CUSTOM_VF_BLUE", + "CUSTOM_FOOBAR", + ], + }, + ) diff --git a/nova/tests/unit/compute/test_pci_placement_translator.py b/nova/tests/unit/compute/test_pci_placement_translator.py index edaf5b6870..4bde834cbb 100644 --- a/nova/tests/unit/compute/test_pci_placement_translator.py +++ b/nova/tests/unit/compute/test_pci_placement_translator.py @@ -46,7 +46,12 @@ class TestTranslator(test.NoDBTestCase): """ pci_tracker = mock.Mock() pci_tracker.pci_devs = pci_device.PciDeviceList( - objects=[pci_device.PciDevice(address="0000:81:00.0")] + objects=[ + pci_device.PciDevice( + address="0000:81:00.0", + status=fields.PciDeviceStatus.AVAILABLE, + ) + ] ) # So we have a device but there is no spec for it pci_tracker.dev_filter.get_devspec = mock.Mock(return_value=None) @@ -117,10 +122,10 @@ class TestTranslator(test.NoDBTestCase): dev_type=fields.PciDeviceType.SRIOV_VF ) - pv.add_dev(pf, {"resource_class": "foo"}) + pv._add_dev(pf, {"resource_class": "foo"}) ex = self.assertRaises( exception.PlacementPciDependentDeviceException, - pv.add_dev, + pv._add_dev, vf, {"resource_class": "bar"} ) @@ -149,11 +154,11 @@ class TestTranslator(test.NoDBTestCase): dev_type=fields.PciDeviceType.SRIOV_VF ) - pv.add_dev(vf, {"resource_class": "foo"}) - pv.add_dev(vf2, {"resource_class": "foo"}) + pv._add_dev(vf, {"resource_class": "foo"}) + pv._add_dev(vf2, {"resource_class": "foo"}) ex = self.assertRaises( exception.PlacementPciDependentDeviceException, - pv.add_dev, + pv._add_dev, pf, {"resource_class": "bar"} ) @@ -175,13 +180,13 @@ class TestTranslator(test.NoDBTestCase): for f in range(0, 4) ] - pv.add_dev(vf1, {"resource_class": "a", "traits": "foo,bar,baz"}) + pv._add_dev(vf1, {"resource_class": "a", "traits": "foo,bar,baz"}) # order is irrelevant - pv.add_dev(vf2, {"resource_class": "a", "traits": "foo,baz,bar"}) + pv._add_dev(vf2, {"resource_class": "a", "traits": "foo,baz,bar"}) # but missing trait is rejected ex = self.assertRaises( exception.PlacementPciMixedTraitsException, - pv.add_dev, + pv._add_dev, vf3, {"resource_class": "a", "traits": "foo,bar"}, ) @@ -195,7 +200,7 @@ class TestTranslator(test.NoDBTestCase): # as well as additional trait ex = self.assertRaises( exception.PlacementPciMixedTraitsException, - pv.add_dev, + pv._add_dev, vf4, {"resource_class": "a", "traits": "foo,bar,baz,extra"} )