Stop if tracking is disable after it was enabled before
We don't support enabling and then later disabling [pci]report_in_placement. This patch adds a check that kills the nova-compute service is such situation. The implementation rely on an extra trait COMPUTE_MANAGED_PCI_DEVICE that is added to every PCI RP automatically. Note that we cannot use the existing OWNER_NOVA standard trait as that will be exists on other RPs in RP three. Also we cannot use the naming scheme of the PCI RPs as cyborg is free to use the same scheme and also nova might want to use that for VGPUs / MDEVs in the future. Depends-On: https://review.opendev.org/c/openstack/os-traits/+/850538 blueprint: pci-device-tracking-in-placement Change-Id: I6e68d7754cee51525894c14a74a554d82a648d8d
This commit is contained in:
@@ -73,10 +73,12 @@ def _get_traits_for_dev(
|
||||
# traits is a comma separated list of placement trait names
|
||||
traits_str = dev_spec_tags.get("traits")
|
||||
if not traits_str:
|
||||
return set()
|
||||
return {os_traits.COMPUTE_MANAGED_PCI_DEVICE}
|
||||
|
||||
traits = traits_str.split(',')
|
||||
return set(_normalize_traits(traits))
|
||||
return set(_normalize_traits(traits)) | {
|
||||
os_traits.COMPUTE_MANAGED_PCI_DEVICE
|
||||
}
|
||||
|
||||
|
||||
def _get_rc_for_dev(
|
||||
@@ -211,7 +213,7 @@ class PciResourceProvider:
|
||||
if self.devs:
|
||||
return (
|
||||
f"RP({self.name}, {self.resource_class}={len(self.devs)}, "
|
||||
f"traits={','.join(self.traits or set())})"
|
||||
f"traits={','.join(sorted(self.traits or set()))})"
|
||||
)
|
||||
else:
|
||||
return f"RP({self.name}, <EMPTY>)"
|
||||
@@ -357,6 +359,26 @@ def ensure_no_dev_spec_with_devname(dev_specs: ty.List[devspec.PciDeviceSpec]):
|
||||
raise exception.PlacementPciException(error=msg)
|
||||
|
||||
|
||||
def ensure_tracking_was_not_enabled_before(
|
||||
provider_tree: provider_tree.ProviderTree
|
||||
) -> None:
|
||||
# If placement tracking was enabled before then we do not support
|
||||
# disabling it later. To check for that we can look for RPs with
|
||||
# the COMPUTE_MANAGED_PCI_DEVICE trait. If any then we raise to
|
||||
# kill the service
|
||||
for rp_uuid in provider_tree.get_provider_uuids():
|
||||
if (
|
||||
os_traits.COMPUTE_MANAGED_PCI_DEVICE
|
||||
in provider_tree.data(rp_uuid).traits
|
||||
):
|
||||
msg = _(
|
||||
"The [pci]report_in_placement is False but it was enabled "
|
||||
"before on this compute. Nova does not support disabling "
|
||||
"it after it is enabled."
|
||||
)
|
||||
raise exception.PlacementPciException(error=msg)
|
||||
|
||||
|
||||
def update_provider_tree_for_pci(
|
||||
provider_tree: provider_tree.ProviderTree,
|
||||
nodename: str,
|
||||
@@ -393,6 +415,7 @@ def update_provider_tree_for_pci(
|
||||
}
|
||||
"""
|
||||
if not _is_placement_tracking_enabled():
|
||||
ensure_tracking_was_not_enabled_before(provider_tree)
|
||||
# If tracking is not enabled we just return without touching anything
|
||||
return False
|
||||
|
||||
|
||||
@@ -46,7 +46,7 @@ class PlacementPCIReportingTests(test_pci_sriov_servers._PCIServersTestBase):
|
||||
return_value=True
|
||||
)
|
||||
self.addCleanup(patcher.stop)
|
||||
patcher.start()
|
||||
self.mock_pci_report_in_placement = patcher.start()
|
||||
|
||||
# These tests should not depend on the host's sysfs
|
||||
self.useFixture(
|
||||
@@ -286,8 +286,9 @@ class PlacementPCIReportingTests(test_pci_sriov_servers._PCIServersTestBase):
|
||||
)
|
||||
self.assertIn(
|
||||
"VFs from the same PF cannot be configured with different set of "
|
||||
"'traits' in [pci]device_spec. We got CUSTOM_BAR for 0000:81:00.2 "
|
||||
"and CUSTOM_FOO for 0000:81:00.1.",
|
||||
"'traits' in [pci]device_spec. We got "
|
||||
"COMPUTE_MANAGED_PCI_DEVICE,CUSTOM_BAR for 0000:81:00.2 and "
|
||||
"COMPUTE_MANAGED_PCI_DEVICE,CUSTOM_FOO for 0000:81:00.1.",
|
||||
str(ex)
|
||||
)
|
||||
|
||||
@@ -676,3 +677,74 @@ class PlacementPCIReportingTests(test_pci_sriov_servers._PCIServersTestBase):
|
||||
],
|
||||
},
|
||||
)
|
||||
|
||||
def test_reporting_disabled_nothing_is_reported(self):
|
||||
# The fake libvirt will emulate on the host:
|
||||
# * one type-PCI in slot 0
|
||||
pci_info = fakelibvirt.HostPCIDevicesInfo(
|
||||
num_pci=1, num_pfs=0, num_vfs=0)
|
||||
# the config matches the PCI dev
|
||||
device_spec = self._to_device_spec_conf(
|
||||
[
|
||||
{
|
||||
"vendor_id": fakelibvirt.PCI_VEND_ID,
|
||||
"product_id": fakelibvirt.PCI_PROD_ID,
|
||||
},
|
||||
]
|
||||
)
|
||||
self.flags(group='pci', device_spec=device_spec)
|
||||
# Disable placement reporting so even if there are PCI devices on the
|
||||
# hypervisor matching the [pci]device_spec config they are not reported
|
||||
# to Placement
|
||||
self.mock_pci_report_in_placement.return_value = False
|
||||
self.start_compute(hostname="compute1", pci_info=pci_info)
|
||||
|
||||
self.assert_placement_pci_view(
|
||||
"compute1",
|
||||
inventories={},
|
||||
traits={},
|
||||
)
|
||||
|
||||
def test_reporting_cannot_be_disable_once_it_is_enabled(self):
|
||||
# The fake libvirt will emulate on the host:
|
||||
# * one type-PCI in slot 0
|
||||
pci_info = fakelibvirt.HostPCIDevicesInfo(
|
||||
num_pci=1, num_pfs=0, num_vfs=0)
|
||||
# the config matches the PCI dev
|
||||
device_spec = self._to_device_spec_conf(
|
||||
[
|
||||
{
|
||||
"vendor_id": fakelibvirt.PCI_VEND_ID,
|
||||
"product_id": fakelibvirt.PCI_PROD_ID,
|
||||
},
|
||||
]
|
||||
)
|
||||
self.flags(group='pci', device_spec=device_spec)
|
||||
self.start_compute(hostname="compute1", pci_info=pci_info)
|
||||
|
||||
self.assert_placement_pci_view(
|
||||
"compute1",
|
||||
inventories={
|
||||
"0000:81:00.0": {self.PCI_RC: 1},
|
||||
},
|
||||
traits={
|
||||
"0000:81:00.0": [],
|
||||
},
|
||||
)
|
||||
|
||||
# Try to disable placement reporting. The compute will refuse to start
|
||||
# as there are already PCI device RPs in placement.
|
||||
self.mock_pci_report_in_placement.return_value = False
|
||||
ex = self.assertRaises(
|
||||
exception.PlacementPciException,
|
||||
self.restart_compute_service,
|
||||
hostname="compute1",
|
||||
pci_info=pci_info,
|
||||
keep_hypervisor_state=False,
|
||||
)
|
||||
self.assertIn(
|
||||
"The [pci]report_in_placement is False but it was enabled before "
|
||||
"on this compute. Nova does not support disabling it after it is "
|
||||
"enabled.",
|
||||
str(ex)
|
||||
)
|
||||
|
||||
@@ -113,7 +113,9 @@ class _PCIServersTestBase(base.ServersTestBase):
|
||||
|
||||
rp_traits = self._get_provider_traits(rp['uuid'])
|
||||
self.assertEqual(
|
||||
set(traits[rp_name]),
|
||||
# COMPUTE_MANAGED_PCI_DEVICE is automatically reported on
|
||||
# PCI device RPs by nova
|
||||
set(traits[rp_name]) | {"COMPUTE_MANAGED_PCI_DEVICE"},
|
||||
set(rp_traits),
|
||||
f"Traits on RP {real_rp_name} does not match with expectation"
|
||||
)
|
||||
|
||||
@@ -87,7 +87,9 @@ class TestTranslator(test.NoDBTestCase):
|
||||
)
|
||||
def test_trait_normalization(self, trait_names, expected_traits):
|
||||
self.assertEqual(
|
||||
expected_traits, ppt._get_traits_for_dev({"traits": trait_names}))
|
||||
expected_traits | {"COMPUTE_MANAGED_PCI_DEVICE"},
|
||||
ppt._get_traits_for_dev({"traits": trait_names})
|
||||
)
|
||||
|
||||
@ddt.unpack
|
||||
@ddt.data(
|
||||
@@ -192,9 +194,11 @@ class TestTranslator(test.NoDBTestCase):
|
||||
)
|
||||
self.assertEqual(
|
||||
"VFs from the same PF cannot be configured with different set of "
|
||||
"'traits' in [pci]device_spec. We got CUSTOM_BAR,CUSTOM_FOO for "
|
||||
"0000:81:00.2 and CUSTOM_BAR,CUSTOM_BAZ,CUSTOM_FOO for "
|
||||
"0000:81:00.0,0000:81:00.1.",
|
||||
"'traits' in [pci]device_spec. We got "
|
||||
"COMPUTE_MANAGED_PCI_DEVICE,CUSTOM_BAR,CUSTOM_FOO for "
|
||||
"0000:81:00.2 and "
|
||||
"COMPUTE_MANAGED_PCI_DEVICE,CUSTOM_BAR,CUSTOM_BAZ,CUSTOM_FOO "
|
||||
"for 0000:81:00.0,0000:81:00.1.",
|
||||
str(ex),
|
||||
)
|
||||
# as well as additional trait
|
||||
@@ -207,8 +211,8 @@ class TestTranslator(test.NoDBTestCase):
|
||||
self.assertEqual(
|
||||
"VFs from the same PF cannot be configured with different set of "
|
||||
"'traits' in [pci]device_spec. We got "
|
||||
"CUSTOM_BAR,CUSTOM_BAZ,CUSTOM_EXTRA,CUSTOM_FOO for 0000:81:00.3 "
|
||||
"and CUSTOM_BAR,CUSTOM_BAZ,CUSTOM_FOO for "
|
||||
"0000:81:00.0,0000:81:00.1.",
|
||||
"COMPUTE_MANAGED_PCI_DEVICE,CUSTOM_BAR,CUSTOM_BAZ,CUSTOM_EXTRA,"
|
||||
"CUSTOM_FOO for 0000:81:00.3 and COMPUTE_MANAGED_PCI_DEVICE,"
|
||||
"CUSTOM_BAR,CUSTOM_BAZ,CUSTOM_FOO for 0000:81:00.0,0000:81:00.1.",
|
||||
str(ex),
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user