Func test for PCI in placement scheduling

This patch adds various functional test cases showing that the placement allocation candidate restricts the available PCI pools during the run of the filter scheduler and that the placement PCI allocation later drives the PCI claim code in the compute. blueprint: pci-device-tracking-in-placement Change-Id: If46ee131a9e5499ae91da93ddaac88aa49182f56
2022-08-17 10:40:37 +02:00
parent f86f1800f0
commit 1462883dcc
3 changed files with 350 additions and 7 deletions
@@ -869,6 +869,20 @@ class PlacementHelperMixin:
            'Test expected a single migration but found %i' % len(migrations))
        return migrations[0].uuid

+    def _reserve_placement_resource(self, rp_name, rc_name, reserved):
+        rp_uuid = self._get_provider_uuid_by_name(rp_name)
+        inv = self.placement.get(
+            '/resource_providers/%s/inventories/%s' % (rp_uuid, rc_name),
+            version='1.26'
+        ).body
+        inv["reserved"] = reserved
+        result = self.placement.put(
+            '/resource_providers/%s/inventories/%s' % (rp_uuid, rc_name),
+            version='1.26', body=inv
+        ).body
+        self.assertEqual(reserved, result["reserved"])
+        return result
+

 class PlacementInstanceHelperMixin(InstanceHelperMixin, PlacementHelperMixin):
    """A placement-aware variant of InstanceHelperMixin."""
@@ -73,10 +73,6 @@ class PlacementPCIReportingTests(test_pci_sriov_servers._PCIServersTestBase):
            )
        )

-    @staticmethod
-    def _to_device_spec_conf(spec_list):
-        return [jsonutils.dumps(x) for x in spec_list]
-

 class PlacementPCIInventoryReportingTests(PlacementPCIReportingTests):

@@ -1623,7 +1619,7 @@ class PlacementPCIAllocationHealingTests(PlacementPCIReportingTests):
 class RCAndTraitBasedPCIAliasTests(PlacementPCIReportingTests):
    def setUp(self):
        super().setUp()
-        # TODO(gibi): replace this with setting the [scheduler]pci_prefilter
+        # TODO(gibi): replace this with setting the [scheduler]pci_in_placement
        # confing to True once that config is added
        self.mock_pci_in_placement_enabled = self.useFixture(
            fixtures.MockPatch(
@@ -1742,3 +1738,196 @@ class RCAndTraitBasedPCIAliasTests(PlacementPCIReportingTests):
        self.assert_placement_pci_view(
            "compute1", **compute1_expected_placement_view)
        self.assert_no_pci_healing("compute1")
+
+    def test_device_claim_consistent_with_placement_allocation(self):
+        """As soon as [scheduler]pci_in_placement is enabled the nova-scheduler
+        will allocate PCI devices in placement. Then on the nova-compute side
+        the PCI claim will also allocate PCI devices in the nova DB. This test
+        will create a situation where the two allocation could contradict and
+        observes that in a contradicting situation the PCI claim will fail
+        instead of allocating a device that is not allocated in placement.
+
+        For the contradiction to happen we need two PCI devices that looks
+        different from placement perspective than from the nova DB perspective.
+
+        We can do that by assigning different traits from in placement and
+        having different product_id in the Nova DB. Then we will create a
+        request that would match from placement perspective to one of the
+        device only and would match to the other device from nova DB
+        perspective. Then we will expect that the boot request fails with no
+        valid host.
+        """
+        # The fake libvirt will emulate on the host:
+        # * one type-PCI in slot 0
+        # * one type-PF in slot 1
+        pci_info = fakelibvirt.HostPCIDevicesInfo(
+            num_pci=1, num_pfs=1, num_vfs=0)
+        # we allow both device to be consumed, but we assign different traits
+        # so we can selectively schedule to one of the devices in placement
+        device_spec = self._to_device_spec_conf(
+            [
+                {
+                    "address": "0000:81:00.0",
+                    "resource_class": "MY_DEV",
+                    "traits": "A_PCI",
+                },
+                {
+                    "address": "0000:81:01.0",
+                    "resource_class": "MY_DEV",
+                    "traits": "A_PF",
+                },
+            ]
+        )
+        self.flags(group='pci', device_spec=device_spec)
+        self.start_compute(hostname="compute1", pci_info=pci_info)
+
+        self.assertPCIDeviceCounts("compute1", total=2, free=2)
+        compute1_expected_placement_view = {
+            "inventories": {
+                "0000:81:00.0": {"CUSTOM_MY_DEV": 1},
+                "0000:81:01.0": {"CUSTOM_MY_DEV": 1},
+            },
+            "traits": {
+                "0000:81:00.0": [
+                    "CUSTOM_A_PCI",
+                ],
+                "0000:81:01.0": [
+                    "CUSTOM_A_PF",
+                ],
+            },
+            "usages": {
+                "0000:81:00.0": {"CUSTOM_MY_DEV": 0},
+                "0000:81:01.0": {"CUSTOM_MY_DEV": 0},
+            },
+            "allocations": {},
+        }
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
+
+        # now we create a PCI alias that cannot be fulfilled from both
+        # nova and placement perspective at the same time, but can be fulfilled
+        # from each perspective individually
+        pci_alias_no_match = {
+            "resource_class": "MY_DEV",
+            # by product_id this matches 81.00 only
+            "product_id": fakelibvirt.PCI_PROD_ID,
+            # by trait this matches 81.01 only
+            "traits": "A_PF",
+            "name": "a-pci",
+        }
+        self.flags(
+            group="pci",
+            alias=self._to_device_spec_conf([pci_alias_no_match]),
+        )
+
+        # then try to boot with the alias and expect no valid host error
+        extra_spec = {"pci_passthrough:alias": "a-pci:1"}
+        flavor_id = self._create_flavor(extra_spec=extra_spec)
+        server = self._create_server(
+            flavor_id=flavor_id, networks=[], expected_state='ERROR')
+        self.assertIn('fault', server)
+        self.assertIn('No valid host', server['fault']['message'])
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
+        self.assert_no_pci_healing("compute1")
+
+    def test_vf_with_split_allocation(self):
+        # The fake libvirt will emulate on the host:
+        # * two type-PFs in slot 0, 1 with 2 VFs each
+        pci_info = fakelibvirt.HostPCIDevicesInfo(
+            num_pci=0, num_pfs=2, num_vfs=4)
+        # make all 4 VFs available
+        device_spec = self._to_device_spec_conf(
+            [
+                {
+                    "product_id": fakelibvirt.VF_PROD_ID,
+                    "resource_class": "MY_VF",
+                    "traits": "blue",
+                },
+            ]
+        )
+        self.flags(group='pci', device_spec=device_spec)
+        self.start_compute(hostname="compute1", pci_info=pci_info)
+
+        compute1_expected_placement_view = {
+            "inventories": {
+                "0000:81:00.0": {"CUSTOM_MY_VF": 2},
+                "0000:81:01.0": {"CUSTOM_MY_VF": 2},
+            },
+            "traits": {
+                "0000:81:00.0": [
+                    "CUSTOM_BLUE",
+                ],
+                "0000:81:01.0": [
+                    "CUSTOM_BLUE",
+                ],
+            },
+            "usages": {
+                "0000:81:00.0": {"CUSTOM_MY_VF": 0},
+                "0000:81:01.0": {"CUSTOM_MY_VF": 0},
+            },
+            "allocations": {},
+        }
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
+        self.assertPCIDeviceCounts('compute1', total=4, free=4)
+
+        pci_alias_vf = {
+            "resource_class": "MY_VF",
+            "traits": "blue",
+            "name": "a-vf",
+        }
+        self.flags(
+            group="pci",
+            # FIXME(gibi): make _to_device_spec_conf a general util for both
+            # device spec and pci alias
+            alias=self._to_device_spec_conf([pci_alias_vf]),
+        )
+
+        # reserve VFs from 81.01 in placement to drive the first instance to
+        # 81.00
+        self._reserve_placement_resource(
+            "compute1_0000:81:01.0", "CUSTOM_MY_VF", 2)
+        # boot an instance with a single VF
+        # we expect that it is allocated from 81.00 as both VF on 81.01 is
+        # reserved
+        extra_spec = {"pci_passthrough:alias": "a-vf:1"}
+        flavor_id = self._create_flavor(extra_spec=extra_spec)
+        server_1vf = self._create_server(flavor_id=flavor_id, networks=[])
+
+        self.assertPCIDeviceCounts('compute1', total=4, free=3)
+        compute1_expected_placement_view["usages"] = {
+            "0000:81:00.0": {"CUSTOM_MY_VF": 1}
+        }
+        compute1_expected_placement_view["allocations"][server_1vf["id"]] = {
+            "0000:81:00.0": {"CUSTOM_MY_VF": 1},
+        }
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
+        self.assert_no_pci_healing("compute1")
+
+        # Boot a second instance requesting two VFs and ensure that the only
+        # way that placement allows this is to split the two VFs between PFs.
+        # Let's remove the reservation of one resource from 81.01 so the only
+        # viable placement candidate is: one VF from 81.00 and one VF from
+        # 81.01
+        self._reserve_placement_resource(
+            "compute1_0000:81:01.0", "CUSTOM_MY_VF", 1)
+
+        extra_spec = {"pci_passthrough:alias": "a-vf:2"}
+        flavor_id = self._create_flavor(extra_spec=extra_spec)
+        server_2vf = self._create_server(flavor_id=flavor_id, networks=[])
+
+        self.assertPCIDeviceCounts('compute1', total=4, free=1)
+        compute1_expected_placement_view["usages"] = {
+            # both VM uses one VF
+            "0000:81:00.0": {"CUSTOM_MY_VF": 2},
+            "0000:81:01.0": {"CUSTOM_MY_VF": 1},
+        }
+        compute1_expected_placement_view["allocations"][server_2vf["id"]] = {
+            "0000:81:00.0": {"CUSTOM_MY_VF": 1},
+            "0000:81:01.0": {"CUSTOM_MY_VF": 1},
+        }
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
+        self.assert_no_pci_healing("compute1")
@@ -95,6 +95,8 @@ class _PCIServersTestBase(base.ServersTestBase):

    ADDITIONAL_FILTERS = ['NUMATopologyFilter', 'PciPassthroughFilter']

+    PCI_RC = f"CUSTOM_PCI_{fakelibvirt.PCI_VEND_ID}_{fakelibvirt.PCI_PROD_ID}"
+
    def setUp(self):
        self.ctxt = context.get_admin_context()
        self.flags(
@@ -240,6 +242,10 @@ class _PCIServersTestBase(base.ServersTestBase):
                    f"{actual_rp_allocs} instead."
                )

+    @staticmethod
+    def _to_device_spec_conf(spec_list):
+        return [jsonutils.dumps(x) for x in spec_list]
+

 class _PCIServersWithMigrationTestBase(_PCIServersTestBase):

@@ -1889,7 +1895,6 @@ class PCIServersTest(_PCIServersTestBase):
            'name': ALIAS_NAME,
        }
    )]
-    PCI_RC = f"CUSTOM_PCI_{fakelibvirt.PCI_VEND_ID}_{fakelibvirt.PCI_PROD_ID}"

    def setUp(self):
        super().setUp()
@@ -2317,6 +2322,19 @@ class PCIServersWithPreferredNUMATest(_PCIServersTestBase):
    )]
    expected_state = 'ACTIVE'

+    def setUp(self):
+        super().setUp()
+        self.flags(group="pci", report_in_placement=True)
+        # TODO(gibi): replace this with setting the [scheduler]pci_in_placement
+        # confing to True once that config is added
+        self.mock_pci_in_placement_enabled = self.useFixture(
+            fixtures.MockPatch(
+                'nova.objects.request_spec.RequestSpec.'
+                '_pci_in_placement_enabled',
+                return_value=True
+            )
+        ).mock
+
    def test_create_server_with_pci_dev_and_numa(self):
        """Validate behavior of 'preferred' PCI NUMA policy.

@@ -2329,6 +2347,20 @@ class PCIServersWithPreferredNUMATest(_PCIServersTestBase):

        pci_info = fakelibvirt.HostPCIDevicesInfo(num_pci=1, numa_node=0)
        self.start_compute(pci_info=pci_info)
+        compute1_placement_pci_view = {
+            "inventories": {
+                "0000:81:00.0": {self.PCI_RC: 1},
+            },
+            "traits": {
+                "0000:81:00.0": [],
+            },
+            "usages": {
+                "0000:81:00.0": {self.PCI_RC: 0},
+            },
+            "allocations": {},
+        }
+        self.assert_placement_pci_view(
+            "compute1", **compute1_placement_pci_view)

        # boot one instance with no PCI device to "fill up" NUMA node 0
        extra_spec = {
@@ -2337,13 +2369,26 @@ class PCIServersWithPreferredNUMATest(_PCIServersTestBase):
        flavor_id = self._create_flavor(vcpu=4, extra_spec=extra_spec)
        self._create_server(flavor_id=flavor_id)

+        self.assert_placement_pci_view(
+            "compute1", **compute1_placement_pci_view)
+
        # now boot one with a PCI device, which should succeed thanks to the
        # use of the PCI policy
        extra_spec['pci_passthrough:alias'] = '%s:1' % self.ALIAS_NAME
        flavor_id = self._create_flavor(extra_spec=extra_spec)
-        self._create_server(
+        server_with_pci = self._create_server(
            flavor_id=flavor_id, expected_state=self.expected_state)

+        if self.expected_state == 'ACTIVE':
+            compute1_placement_pci_view["usages"][
+                "0000:81:00.0"][self.PCI_RC] = 1
+            compute1_placement_pci_view["allocations"][
+                server_with_pci['id']] = {"0000:81:00.0": {self.PCI_RC: 1}}
+
+        self.assert_placement_pci_view(
+            "compute1", **compute1_placement_pci_view)
+        self.assert_no_pci_healing("compute1")
+

 class PCIServersWithRequiredNUMATest(PCIServersWithPreferredNUMATest):

@@ -2359,6 +2404,101 @@ class PCIServersWithRequiredNUMATest(PCIServersWithPreferredNUMATest):
    )]
    expected_state = 'ERROR'

+    def setUp(self):
+        super().setUp()
+        self.useFixture(
+            fixtures.MockPatch(
+                'nova.pci.utils.is_physical_function', return_value=False
+            )
+        )
+
+    def test_create_server_with_pci_dev_and_numa_placement_conflict(self):
+        # fakelibvirt will simulate the devices:
+        # * one type-PCI in 81.00 on numa 0
+        # * one type-PCI in 81.01 on numa 1
+        pci_info = fakelibvirt.HostPCIDevicesInfo(num_pci=2)
+        # the device_spec will assign different traits to 81.00 than 81.01
+        # so the two devices become different from placement perspective
+        device_spec = self._to_device_spec_conf(
+            [
+                {
+                    'vendor_id': fakelibvirt.PCI_VEND_ID,
+                    'product_id': fakelibvirt.PCI_PROD_ID,
+                    "address": "0000:81:00.0",
+                    "traits": "green",
+                },
+                {
+                    'vendor_id': fakelibvirt.PCI_VEND_ID,
+                    'product_id': fakelibvirt.PCI_PROD_ID,
+                    "address": "0000:81:01.0",
+                    "traits": "red",
+                },
+            ]
+        )
+        self.flags(group='pci', device_spec=device_spec)
+        # both numa 0 and numa 1 has 4 PCPUs
+        self.flags(cpu_dedicated_set='0-7', group='compute')
+        self.start_compute(pci_info=pci_info)
+        compute1_placement_pci_view = {
+            "inventories": {
+                "0000:81:00.0": {self.PCI_RC: 1},
+                "0000:81:01.0": {self.PCI_RC: 1},
+            },
+            "traits": {
+                "0000:81:00.0": ["CUSTOM_GREEN"],
+                "0000:81:01.0": ["CUSTOM_RED"],
+            },
+            "usages": {
+                "0000:81:00.0": {self.PCI_RC: 0},
+                "0000:81:01.0": {self.PCI_RC: 0},
+            },
+            "allocations": {},
+        }
+        self.assert_placement_pci_view(
+            "compute1", **compute1_placement_pci_view)
+
+        # boot one instance with no PCI device to "fill up" NUMA node 0
+        # so we will have PCPUs on numa 0 and we have PCI on both nodes
+        extra_spec = {
+            'hw:cpu_policy': 'dedicated',
+        }
+        flavor_id = self._create_flavor(vcpu=4, extra_spec=extra_spec)
+        self._create_server(flavor_id=flavor_id)
+
+        pci_alias = {
+            "resource_class": self.PCI_RC,
+            # this means only 81.00 will match in placement which is on numa 0
+            "traits": "green",
+            "name": "pci-dev",
+            # this forces the scheduler to only accept a solution where the
+            # PCI device is on the same numa node as the pinned CPUs
+            'numa_policy': fields.PCINUMAAffinityPolicy.REQUIRED,
+        }
+        self.flags(
+            group="pci",
+            # FIXME(gibi): make _to_device_spec_conf a general util for both
+            # device spec and pci alias
+            alias=self._to_device_spec_conf([pci_alias]),
+        )
+
+        # Ask for dedicated CPUs, that can only be fulfilled on numa 1.
+        # And ask for a PCI alias that can only be fulfilled on numa 0 due to
+        # trait request.
+        # We expect that this makes the scheduling fail.
+        extra_spec = {
+            "hw:cpu_policy": "dedicated",
+            "pci_passthrough:alias": "pci-dev:1",
+        }
+        flavor_id = self._create_flavor(extra_spec=extra_spec)
+        server = self._create_server(
+            flavor_id=flavor_id, expected_state="ERROR")
+
+        self.assertIn('fault', server)
+        self.assertIn('No valid host', server['fault']['message'])
+        self.assert_placement_pci_view(
+            "compute1", **compute1_placement_pci_view)
+        self.assert_no_pci_healing("compute1")
+

@ddt.ddt
 class PCIServersWithSRIOVAffinityPoliciesTest(_PCIServersTestBase):