Merge "Support [pci]device_spec reconfiguration"

This commit is contained in:
Zuul
2022-08-26 04:58:50 +00:00
committed by Gerrit Code Review
3 changed files with 429 additions and 27 deletions
+76 -17
View File
@@ -166,9 +166,29 @@ class PciResourceProvider:
self.resource_class = _get_rc_for_dev(dev, dev_spec_tags)
self.traits = _get_traits_for_dev(dev_spec_tags)
def remove_child(self, dev: pci_device.PciDevice) -> None:
# Nothing to do here. The update_provider_tree will handle the
# inventory decrease or the full RP removal
pass
def remove_parent(self, dev: pci_device.PciDevice) -> None:
# Nothing to do here. The update_provider_tree we handle full RP
pass
def update_provider_tree(
self, provider_tree: provider_tree.ProviderTree
) -> None:
if not self.parent_dev and not self.children_devs:
# This means we need to delete the RP from placement if exists
if provider_tree.exists(self.name):
# NOTE(gibi): If there are allocations on this RP then
# Placement will reject the update the provider_tree is
# synced up.
provider_tree.remove(self.name)
return
provider_tree.update_inventory(
self.name,
# NOTE(gibi): The rest of the inventory fields (reserved,
@@ -188,10 +208,13 @@ class PciResourceProvider:
provider_tree.update_traits(self.name, self.traits)
def __str__(self) -> str:
return (
f"RP({self.name}, {self.resource_class}={len(self.devs)}, "
f"traits={','.join(self.traits or set())})"
)
if self.devs:
return (
f"RP({self.name}, {self.resource_class}={len(self.devs)}, "
f"traits={','.join(self.traits or set())})"
)
else:
return f"RP({self.name}, <EMPTY>)"
class PlacementView:
@@ -207,9 +230,7 @@ class PlacementView:
def _ensure_rp(self, rp_name: str) -> PciResourceProvider:
return self.rps.setdefault(rp_name, PciResourceProvider(rp_name))
def _add_child(
self, dev: pci_device.PciDevice, dev_spec_tags: ty.Dict[str, str]
) -> None:
def _get_rp_name_for_child(self, dev: pci_device.PciDevice) -> str:
if not dev.parent_addr:
msg = _(
"Missing parent address for PCI device s(dev)% with "
@@ -220,7 +241,12 @@ class PlacementView:
}
raise exception.PlacementPciException(error=msg)
rp_name = self._get_rp_name_for_address(dev.parent_addr)
return self._get_rp_name_for_address(dev.parent_addr)
def _add_child(
self, dev: pci_device.PciDevice, dev_spec_tags: ty.Dict[str, str]
) -> None:
rp_name = self._get_rp_name_for_child(dev)
self._ensure_rp(rp_name).add_child(dev, dev_spec_tags)
def _add_parent(
@@ -229,7 +255,7 @@ class PlacementView:
rp_name = self._get_rp_name_for_address(dev.address)
self._ensure_rp(rp_name).add_parent(dev, dev_spec_tags)
def add_dev(
def _add_dev(
self, dev: pci_device.PciDevice, dev_spec_tags: ty.Dict[str, str]
) -> None:
if dev_spec_tags.get("physical_network"):
@@ -263,6 +289,46 @@ class PlacementView:
# check for running migrations.
pass
def _remove_child(self, dev: pci_device.PciDevice) -> None:
rp_name = self._get_rp_name_for_child(dev)
self._ensure_rp(rp_name).remove_child(dev)
def _remove_parent(self, dev: pci_device.PciDevice) -> None:
rp_name = self._get_rp_name_for_address(dev.address)
self._ensure_rp(rp_name).remove_parent(dev)
def _remove_dev(self, dev: pci_device.PciDevice) -> None:
"""Remove PCI devices from Placement that existed before but now
deleted from the hypervisor or unlisted from [pci]device_spec
"""
if dev.dev_type in PARENT_TYPES:
self._remove_parent(dev)
elif dev.dev_type in CHILD_TYPES:
self._remove_child(dev)
def process_dev(
self,
dev: pci_device.PciDevice,
dev_spec: ty.Optional[devspec.PciDeviceSpec],
) -> None:
if dev.status in (
fields.PciDeviceStatus.DELETED,
fields.PciDeviceStatus.REMOVED,
):
self._remove_dev(dev)
else:
if not dev_spec:
LOG.warning(
"Device spec is not found for device %s in "
"[pci]device_spec. Ignoring device in Placement resource "
"view. This should not happen. Please file a bug.",
dev.address
)
return
self._add_dev(dev, dev_spec.get_tags())
def __str__(self) -> str:
return (
f"Placement PCI view on {self.root_rp_name}: "
@@ -341,14 +407,7 @@ def update_provider_tree_for_pci(
# match the PCI device with the [pci]dev_spec config to access
# the configuration metadata tags
dev_spec = pci_tracker.dev_filter.get_devspec(dev)
if not dev_spec:
LOG.warning(
"Device spec is not found for device %s in [pci]device_spec. "
"Ignoring device in Placement resource view. "
"This should not happen. Please file a bug.", dev.address)
continue
pv.add_dev(dev, dev_spec.get_tags())
pv.process_dev(dev, dev_spec)
LOG.info("Placement PCI resource view: %s", pv)
@@ -338,3 +338,341 @@ class PlacementPCIReportingTests(test_pci_sriov_servers._PCIServersTestBase):
"instead.",
str(ex)
)
def test_remove_pci(self):
# The fake libvirt will emulate on the host:
# * one type-PCI
pci_info = fakelibvirt.HostPCIDevicesInfo(
num_pci=1, num_pfs=0, num_vfs=0)
# the config matches that PCI dev
device_spec = self._to_device_spec_conf(
[
{
"vendor_id": fakelibvirt.PCI_VEND_ID,
"product_id": fakelibvirt.PCI_PROD_ID,
},
]
)
self.flags(group='pci', device_spec=device_spec)
self.start_compute(hostname="compute1", pci_info=pci_info)
self.assert_placement_pci_view(
"compute1",
inventories={
"0000:81:00.0": {self.PCI_RC: 1},
},
traits={
"0000:81:00.0": [],
},
)
# now un-configure the PCI device and restart the compute
self.flags(group='pci', device_spec=self._to_device_spec_conf([]))
self.restart_compute_service(hostname="compute1")
# the RP had no allocation so nova could remove it
self.assert_placement_pci_view(
"compute1",
inventories={},
traits={},
)
def test_remove_one_vf(self):
# The fake libvirt will emulate on the host:
# * one type-PFs in slot 0 with two type-VFs 00.1, 00.2
pci_info = fakelibvirt.HostPCIDevicesInfo(
num_pci=0, num_pfs=1, num_vfs=2)
# then the config matching the VFs
device_spec = self._to_device_spec_conf(
[
{
"vendor_id": fakelibvirt.PCI_VEND_ID,
"product_id": fakelibvirt.VF_PROD_ID,
},
]
)
self.flags(group='pci', device_spec=device_spec)
self.start_compute(hostname="compute1", pci_info=pci_info)
self.assert_placement_pci_view(
"compute1",
inventories={
"0000:81:00.0": {self.VF_RC: 2},
},
traits={
"0000:81:00.0": [],
},
)
# remove one of the VFs from the hypervisor and then restart the
# compute
pci_info = fakelibvirt.HostPCIDevicesInfo(
num_pci=0, num_pfs=1, num_vfs=1)
self.restart_compute_service(
hostname="compute1",
pci_info=pci_info,
keep_hypervisor_state=False
)
# total value is expected to decrease to 1
self.assert_placement_pci_view(
"compute1",
inventories={
"0000:81:00.0": {self.VF_RC: 1},
},
traits={
"0000:81:00.0": [],
},
)
def test_remove_all_vfs(self):
# The fake libvirt will emulate on the host:
# * one type-PFs in slot 0 with two type-VFs 00.1, 00.2
pci_info = fakelibvirt.HostPCIDevicesInfo(
num_pci=0, num_pfs=1, num_vfs=2)
# then the config patches the VFs
device_spec = self._to_device_spec_conf(
[
{
"vendor_id": fakelibvirt.PCI_VEND_ID,
"product_id": fakelibvirt.VF_PROD_ID,
},
]
)
self.flags(group='pci', device_spec=device_spec)
self.start_compute(hostname="compute1", pci_info=pci_info)
self.assert_placement_pci_view(
"compute1",
inventories={
"0000:81:00.0": {self.VF_RC: 2},
},
traits={
"0000:81:00.0": [],
},
)
# remove both VFs from the hypervisor and restart the compute
pci_info = fakelibvirt.HostPCIDevicesInfo(
num_pci=0, num_pfs=1, num_vfs=0)
self.restart_compute_service(
hostname="compute1",
pci_info=pci_info,
keep_hypervisor_state=False
)
# we expect that the RP is deleted
self.assert_placement_pci_view(
"compute1",
inventories={},
traits={},
)
def test_remove_all_vfs_add_pf(self):
# The fake libvirt will emulate on the host:
# * one type-PFs in slot 0 with two type-VFs 00.1, 00.2
pci_info = fakelibvirt.HostPCIDevicesInfo(
num_pci=0, num_pfs=1, num_vfs=2)
# then the config matches both VFs
device_spec = self._to_device_spec_conf(
[
{
"vendor_id": fakelibvirt.PCI_VEND_ID,
"product_id": fakelibvirt.VF_PROD_ID,
},
]
)
self.flags(group='pci', device_spec=device_spec)
self.start_compute(hostname="compute1", pci_info=pci_info)
self.assert_placement_pci_view(
"compute1",
inventories={
"0000:81:00.0": {self.VF_RC: 2},
},
traits={
"0000:81:00.0": [],
},
)
# change the config to match the PF but do not match the VFs and
# restart the compute
device_spec = self._to_device_spec_conf(
[
{
"vendor_id": fakelibvirt.PCI_VEND_ID,
"product_id": fakelibvirt.PF_PROD_ID,
},
]
)
self.flags(group='pci', device_spec=device_spec)
self.restart_compute_service(
hostname="compute1",
pci_info=pci_info,
keep_hypervisor_state=False
)
# we expect that VF inventory is removed and the PF inventory is added
self.assert_placement_pci_view(
"compute1",
inventories={
"0000:81:00.0": {self.PF_RC: 1},
},
traits={
"0000:81:00.0": [],
},
)
def test_remove_pf_add_vfs(self):
# The fake libvirt will emulate on the host:
# * one type-PFs in slot 0 with two type-VFs 00.1, 00.2
pci_info = fakelibvirt.HostPCIDevicesInfo(
num_pci=0, num_pfs=1, num_vfs=2)
# then the config only matches the PF
device_spec = self._to_device_spec_conf(
[
{
"vendor_id": fakelibvirt.PCI_VEND_ID,
"product_id": fakelibvirt.PF_PROD_ID,
},
]
)
self.flags(group='pci', device_spec=device_spec)
self.start_compute(hostname="compute1", pci_info=pci_info)
self.assert_placement_pci_view(
"compute1",
inventories={
"0000:81:00.0": {self.PF_RC: 1},
},
traits={
"0000:81:00.0": [],
},
)
# remove the PF from the config and add the VFs instead then restart
# the compute
device_spec = self._to_device_spec_conf(
[
{
"vendor_id": fakelibvirt.PCI_VEND_ID,
"product_id": fakelibvirt.VF_PROD_ID,
},
]
)
self.flags(group='pci', device_spec=device_spec)
self.restart_compute_service(
hostname="compute1",
pci_info=pci_info,
keep_hypervisor_state=False
)
# we expect that PF inventory is removed and the VF inventory is added
self.assert_placement_pci_view(
"compute1",
inventories={
"0000:81:00.0": {self.VF_RC: 2},
},
traits={
"0000:81:00.0": [],
},
)
def test_device_reconfiguration(self):
# The fake libvirt will emulate on the host:
# * two type-PFs in slot 0, 1 with two type-VFs each
pci_info = fakelibvirt.HostPCIDevicesInfo(
num_pci=0, num_pfs=2, num_vfs=4)
# from slot 0 we match the PF only and ignore the VFs
# from slot 1 we match the VFs but ignore the parent PF
device_spec = self._to_device_spec_conf(
[
{
"vendor_id": fakelibvirt.PCI_VEND_ID,
"product_id": fakelibvirt.PF_PROD_ID,
"address": "0000:81:00.0",
"traits": ",".join(
[os_traits.HW_NIC_SRIOV, "CUSTOM_PF", "pf-white"]
),
},
{
"vendor_id": fakelibvirt.PCI_VEND_ID,
"product_id": fakelibvirt.VF_PROD_ID,
"address": "0000:81:01.*",
"traits": ",".join(
[os_traits.HW_NIC_SRIOV_TRUSTED, "CUSTOM_VF", "vf-red"]
),
},
]
)
self.flags(group='pci', device_spec=device_spec)
self.start_compute(hostname="compute1", pci_info=pci_info)
self.assert_placement_pci_view(
"compute1",
inventories={
"0000:81:00.0": {self.PF_RC: 1},
"0000:81:01.0": {self.VF_RC: 2},
},
traits={
"0000:81:00.0": [
"HW_NIC_SRIOV",
"CUSTOM_PF",
"CUSTOM_PF_WHITE",
],
"0000:81:01.0": [
"HW_NIC_SRIOV_TRUSTED",
"CUSTOM_VF",
"CUSTOM_VF_RED",
],
},
)
# change the resource class and traits configuration and restart the
# compute
device_spec = self._to_device_spec_conf(
[
{
"product_id": fakelibvirt.PF_PROD_ID,
"resource_class": "CUSTOM_PF",
"address": "0000:81:00.0",
"traits": ",".join(
[os_traits.HW_NIC_SRIOV, "pf-black"]
),
},
{
"vendor_id": fakelibvirt.PCI_VEND_ID,
"product_id": fakelibvirt.VF_PROD_ID,
"resource_class": "CUSTOM_VF",
"address": "0000:81:01.*",
"traits": ",".join(
[os_traits.HW_NIC_SRIOV_TRUSTED, "vf-blue", "foobar"]
),
},
]
)
self.flags(group='pci', device_spec=device_spec)
self.restart_compute_service(
hostname="compute1",
pci_info=pci_info,
keep_hypervisor_state=False
)
self.assert_placement_pci_view(
"compute1",
inventories={
"0000:81:00.0": {"CUSTOM_PF": 1},
"0000:81:01.0": {"CUSTOM_VF": 2},
},
traits={
"0000:81:00.0": [
"HW_NIC_SRIOV",
"CUSTOM_PF_BLACK",
],
"0000:81:01.0": [
"HW_NIC_SRIOV_TRUSTED",
"CUSTOM_VF_BLUE",
"CUSTOM_FOOBAR",
],
},
)
@@ -46,7 +46,12 @@ class TestTranslator(test.NoDBTestCase):
"""
pci_tracker = mock.Mock()
pci_tracker.pci_devs = pci_device.PciDeviceList(
objects=[pci_device.PciDevice(address="0000:81:00.0")]
objects=[
pci_device.PciDevice(
address="0000:81:00.0",
status=fields.PciDeviceStatus.AVAILABLE,
)
]
)
# So we have a device but there is no spec for it
pci_tracker.dev_filter.get_devspec = mock.Mock(return_value=None)
@@ -117,10 +122,10 @@ class TestTranslator(test.NoDBTestCase):
dev_type=fields.PciDeviceType.SRIOV_VF
)
pv.add_dev(pf, {"resource_class": "foo"})
pv._add_dev(pf, {"resource_class": "foo"})
ex = self.assertRaises(
exception.PlacementPciDependentDeviceException,
pv.add_dev,
pv._add_dev,
vf,
{"resource_class": "bar"}
)
@@ -149,11 +154,11 @@ class TestTranslator(test.NoDBTestCase):
dev_type=fields.PciDeviceType.SRIOV_VF
)
pv.add_dev(vf, {"resource_class": "foo"})
pv.add_dev(vf2, {"resource_class": "foo"})
pv._add_dev(vf, {"resource_class": "foo"})
pv._add_dev(vf2, {"resource_class": "foo"})
ex = self.assertRaises(
exception.PlacementPciDependentDeviceException,
pv.add_dev,
pv._add_dev,
pf,
{"resource_class": "bar"}
)
@@ -175,13 +180,13 @@ class TestTranslator(test.NoDBTestCase):
for f in range(0, 4)
]
pv.add_dev(vf1, {"resource_class": "a", "traits": "foo,bar,baz"})
pv._add_dev(vf1, {"resource_class": "a", "traits": "foo,bar,baz"})
# order is irrelevant
pv.add_dev(vf2, {"resource_class": "a", "traits": "foo,baz,bar"})
pv._add_dev(vf2, {"resource_class": "a", "traits": "foo,baz,bar"})
# but missing trait is rejected
ex = self.assertRaises(
exception.PlacementPciMixedTraitsException,
pv.add_dev,
pv._add_dev,
vf3,
{"resource_class": "a", "traits": "foo,bar"},
)
@@ -195,7 +200,7 @@ class TestTranslator(test.NoDBTestCase):
# as well as additional trait
ex = self.assertRaises(
exception.PlacementPciMixedTraitsException,
pv.add_dev,
pv._add_dev,
vf4,
{"resource_class": "a", "traits": "foo,bar,baz,extra"}
)