From 0d526d1f4bc24d5d6afbdf1b8bb26e9492434944 Mon Sep 17 00:00:00 2001 From: Balazs Gibizer Date: Fri, 17 Jun 2022 19:02:43 +0200 Subject: [PATCH] Reject PCI dependent device config The PCI tracking in placement does not support the configuration where both a PF and its children VFs are configured for nova usage. This patch adds logic to detect and reject such configuration. To be able to kill the service if started with such config special exception handling is added for the update_available_resource code path, similarly how a failed reshape is handled. blueprint: pci-device-tracking-in-placement Change-Id: I708724465d2afaa37a65c231c64da88fc8b458eb --- doc/source/admin/pci-passthrough.rst | 7 +++ nova/compute/manager.py | 13 ++++ nova/compute/pci_placement_translator.py | 12 ++++ nova/exception.py | 8 +++ .../libvirt/test_pci_in_placement.py | 37 +++++++++++ nova/tests/unit/compute/test_compute_mgr.py | 40 ++++++++++++ .../compute/test_pci_placement_translator.py | 62 +++++++++++++++++++ 7 files changed, 179 insertions(+) diff --git a/doc/source/admin/pci-passthrough.rst b/doc/source/admin/pci-passthrough.rst index 26259dcd5c..2fa3619dae 100644 --- a/doc/source/admin/pci-passthrough.rst +++ b/doc/source/admin/pci-passthrough.rst @@ -367,4 +367,11 @@ resource class can be customized via the ``resource_class`` tag in the tag in that configuration that allows specifying a list of placement traits to be added to the resource provider representing the matching PCI devices. +.. important:: + While nova supported configuring both the PF and its children VFs for PCI + passthrough in the past, it only allowed consuming either the parent PF or + its children VFs. Since 26.0.0. (Zed) the nova-compute service will + enforce the same rule for the configuration as well and will refuse to + start if both the parent PF and its VFs are configured. + For deeper technical details please read the `nova specification. `_ diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 866fe65ff9..246cc92dd5 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -10153,6 +10153,19 @@ class ComputeManager(manager.Manager): # (e.g. disable the service). with excutils.save_and_reraise_exception(): LOG.exception("ReshapeNeeded exception is unexpected here!") + except exception.PlacementPciException: + # If we are at startup and the Placement PCI inventory handling + # failed then probably there is a configuration error. Propagate + # the error up to kill the service. + if startup: + raise + # If we are not at startup then we can assume that the + # configuration was correct at startup so the error is probably + # transient. Anyhow we cannot kill the service any more so just + # log the error and continue. + LOG.exception( + "Error updating PCI resources for node %(node)s.", + {'node': nodename}) except Exception: LOG.exception("Error updating resources for node %(node)s.", {'node': nodename}) diff --git a/nova/compute/pci_placement_translator.py b/nova/compute/pci_placement_translator.py index 130e0b1938..d90c70428b 100644 --- a/nova/compute/pci_placement_translator.py +++ b/nova/compute/pci_placement_translator.py @@ -124,6 +124,12 @@ class PciResourceProvider: return [self.parent_dev] if self.parent_dev else self.children_devs def add_child(self, dev, dev_spec_tags: ty.Dict[str, str]) -> None: + if self.parent_dev: + raise exception.PlacementPciDependentDeviceException( + parent_dev=dev.address, + children_devs=",".join(dev.address for dev in self.devs) + ) + rc = _get_rc_for_dev(dev, dev_spec_tags) traits = _get_traits_for_dev(dev_spec_tags) self.children_devs.append(dev) @@ -131,6 +137,12 @@ class PciResourceProvider: self.traits = traits def add_parent(self, dev, dev_spec_tags: ty.Dict[str, str]) -> None: + if self.parent_dev or self.children_devs: + raise exception.PlacementPciDependentDeviceException( + parent_dev=dev.address, + children_devs=",".join(dev.address for dev in self.devs) + ) + self.parent_dev = dev self.resource_class = _get_rc_for_dev(dev, dev_spec_tags) self.traits = _get_traits_for_dev(dev_spec_tags) diff --git a/nova/exception.py b/nova/exception.py index 58aced18c5..5d167bd5d4 100644 --- a/nova/exception.py +++ b/nova/exception.py @@ -2432,3 +2432,11 @@ class ProviderConfigException(NovaException): class PlacementPciException(NovaException): msg_fmt = _( "Failed to gather or report PCI resources to Placement: %(error)s") + + +class PlacementPciDependentDeviceException(PlacementPciException): + msg_fmt = _( + "Configuring both %(parent_dev)s and %(children_devs)s in " + "[pci]device_spec is not supported. Either the parent PF or its " + "children VFs can be configured." + ) diff --git a/nova/tests/functional/libvirt/test_pci_in_placement.py b/nova/tests/functional/libvirt/test_pci_in_placement.py index 83a3401d6a..2655beabbe 100644 --- a/nova/tests/functional/libvirt/test_pci_in_placement.py +++ b/nova/tests/functional/libvirt/test_pci_in_placement.py @@ -20,6 +20,7 @@ from oslo_config import cfg from oslo_log import log as logging from oslo_serialization import jsonutils +from nova import exception from nova.tests.fixtures import libvirt as fakelibvirt from nova.tests.functional.libvirt import test_pci_sriov_servers @@ -154,6 +155,7 @@ class PlacementPCIReportingTests(test_pci_sriov_servers._PCIServersTestBase): }, # slot 1 func 0 is the type-PF dev. The child VF is ignored { + "product_id": fakelibvirt.PF_PROD_ID, "address": "0000:81:01.0", "resource_class": "crypto", "traits": "to-the-moon,hodl" @@ -179,3 +181,38 @@ class PlacementPCIReportingTests(test_pci_sriov_servers._PCIServersTestBase): ], }, ) + + def test_dependent_device_config_is_rejected(self): + """Configuring both the PF and its children VFs is not supported. + Only either of them can be given to nova. + """ + # The fake libvirt will emulate on the host: + # * one type-PF dev in slot 0 with a single type-VF under it + pci_info = fakelibvirt.HostPCIDevicesInfo( + num_pci=0, num_pfs=1, num_vfs=1) + # both device will be matched by our config + device_spec = self._to_device_spec_conf( + [ + # PF + { + "address": "0000:81:00.0" + }, + # Its child VF + { + "address": "0000:81:00.1" + }, + ] + ) + self.flags(group='pci', device_spec=device_spec) + + ex = self.assertRaises( + exception.PlacementPciException, + self.start_compute, + hostname="compute1", + pci_info=pci_info + ) + self.assertIn( + "Configuring both 0000:81:00.1 and 0000:81:00.0 in " + "[pci]device_spec is not supported", + str(ex) + ) diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index 6725d8a27a..b9cacfa82e 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -348,6 +348,46 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, self.context, mock.sentinel.node, startup=True) log_mock.exception.assert_called_once() + def test_update_available_resource_for_node_pci_placement_failed_startup( + self + ): + """If the PCI placement translation failed during startup then the + exception is raised up to kill the service + """ + rt = self._mock_rt(spec_set=['update_available_resource']) + rt.update_available_resource.side_effect = ( + exception.PlacementPciException(error='error')) + + self.assertRaises( + exception.PlacementPciException, + self.compute._update_available_resource_for_node, + self.context, + mock.sentinel.node, + startup=True, + ) + rt.update_available_resource.assert_called_once_with( + self.context, mock.sentinel.node, startup=True) + + @mock.patch('nova.compute.manager.LOG') + def test_update_available_resource_for_node_pci_placement_failed_later( + self, mock_log + ): + """If the PCI placement translation failed later (not at startup) + during a periodic then the exception is just logged + """ + rt = self._mock_rt(spec_set=['update_available_resource']) + rt.update_available_resource.side_effect = ( + exception.PlacementPciException(error='error')) + + self.compute._update_available_resource_for_node( + self.context, mock.sentinel.node, startup=False) + rt.update_available_resource.assert_called_once_with( + self.context, mock.sentinel.node, startup=False) + mock_log.exception.assert_called_once_with( + 'Error updating PCI resources for node %(node)s.', + {'node': mock.sentinel.node} + ) + @mock.patch.object(manager, 'LOG') @mock.patch.object(manager.ComputeManager, '_update_available_resource_for_node') diff --git a/nova/tests/unit/compute/test_pci_placement_translator.py b/nova/tests/unit/compute/test_pci_placement_translator.py index f65600a164..da06274637 100644 --- a/nova/tests/unit/compute/test_pci_placement_translator.py +++ b/nova/tests/unit/compute/test_pci_placement_translator.py @@ -15,6 +15,8 @@ import ddt from unittest import mock from nova.compute import pci_placement_translator as ppt +from nova import exception +from nova.objects import fields from nova.objects import pci_device from nova import test @@ -101,3 +103,63 @@ class TestTranslator(test.NoDBTestCase): expected_rc, ppt._get_rc_for_dev(pci_dev, {"resource_class": rc_name}) ) + + def test_dependent_device_pf_then_vf(self): + pv = ppt.PlacementView("fake-node") + pf = pci_device.PciDevice( + address="0000:81:00.0", + dev_type=fields.PciDeviceType.SRIOV_PF + ) + vf = pci_device.PciDevice( + address="0000:81:00.1", + parent_addr=pf.address, + dev_type=fields.PciDeviceType.SRIOV_VF + ) + + pv.add_dev(pf, {"resource_class": "foo"}) + ex = self.assertRaises( + exception.PlacementPciDependentDeviceException, + pv.add_dev, + vf, + {"resource_class": "bar"} + ) + + self.assertEqual( + "Configuring both 0000:81:00.1 and 0000:81:00.0 in " + "[pci]device_spec is not supported. Either the parent PF or its " + "children VFs can be configured.", + str(ex), + ) + + def test_dependent_device_vf_then_pf(self): + pv = ppt.PlacementView("fake-node") + pf = pci_device.PciDevice( + address="0000:81:00.0", + dev_type=fields.PciDeviceType.SRIOV_PF + ) + vf = pci_device.PciDevice( + address="0000:81:00.1", + parent_addr=pf.address, + dev_type=fields.PciDeviceType.SRIOV_VF + ) + vf2 = pci_device.PciDevice( + address="0000:81:00.2", + parent_addr=pf.address, + dev_type=fields.PciDeviceType.SRIOV_VF + ) + + pv.add_dev(vf, {"resource_class": "foo"}) + pv.add_dev(vf2, {"resource_class": "foo"}) + ex = self.assertRaises( + exception.PlacementPciDependentDeviceException, + pv.add_dev, + pf, + {"resource_class": "bar"} + ) + + self.assertEqual( + "Configuring both 0000:81:00.0 and 0000:81:00.1,0000:81:00.2 in " + "[pci]device_spec is not supported. Either the parent PF or its " + "children VFs can be configured.", + str(ex), + )