Support multiple allocations for vGPUs

Removing the TODO that only allows one VGPU allocation per instance. Now we no longer need to support the very old VGPU usage for the root provider, this is easy. Change-Id: I48d2b700049c81071710e37c05579239255c3539 Related-Bug: #1758086 Signed-off-by: Sylvain Bauza <sbauza@redhat.com>
2022-06-14 15:39:47 +02:00
parent 759e03c35d
commit 55a36f8f6a
5 changed files with 145 additions and 63 deletions
@@ -172,6 +172,39 @@ provided by compute nodes.
   $ openstack server create --flavor vgpu_1 --image cirros-0.3.5-x86_64-uec --wait test-vgpu
 Ask for more than one vGPU per instance by the flavor
 -----------------------------------------------------
 .. versionchanged:: 33.0.0
 We have an open bug report `bug 1758086`_ explaining that the nvidia driver
 doesn't support more than one vGPU per instance (and per GPU resource - which
 can be a physical GPU or a virtual function, see nvidia docs for more details).
 In order to alleviate this problem, this is mandatory to require in the flavor
 to have all the vGPUs to be spread between multiple GPU resource providers.
 For example, you can request two groups of vGPUs this way :
 .. code-block:: console
   $ openstack flavor set vgpu_2 --property "resources1:VGPU=1" \
                                 --property "resources2:VGPU=1" \
 With SR-IOV GPUs (you may need to refer to nvidia documentation to know the
 distinction), this will work without requiring further attributes as every
 single VGPU Resource Provider only provides a single VGPU resource.
 For non-SRIOV GPUs, you may require other properties in order to request
 Placement to allocate you some host with two distinct GPUs.
 You may need to create distinct custom traits per GPU or custom resource
 classes for explicitly telling in your flavor that you would want resources
 from distinct entities, or you could use ``group_policy=isolate`` as a property
 but you would need to make sure that you don't ask for other resources but
 virtual GPUs in your flavor or Placement would shard all the allocations for
 *all* resource groups.
 How to discover a GPU type
 --------------------------
@@ -490,6 +523,7 @@ For nested vGPUs:
 .. _bug 1762688: https://bugs.launchpad.net/nova/+bug/1762688
 .. _bug 1948705: https://bugs.launchpad.net/nova/+bug/1948705
 .. _supports vGPU live-migrations: https://specs.openstack.org/openstack/nova-specs/specs/2024.1/approved/libvirt-mdev-live-migrate.html
 .. _bug 1758086: https://bugs.launchpad.net/nova/+bug/1758086
 .. Links
 .. _Intel GVT-g: https://01.org/igvt-g
@@ -321,10 +321,16 @@ class VGPUTests(VGPUTestBase):
            image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
            flavor_id=flavor, networks='auto', host=self.compute1.host)
-        # FIXME(sbauza): Unfortunately, we only accept one allocation per
+        # Eventually, we have two allocations and two mdevs
-        # instance by the libvirt driver as you can see in _allocate_mdevs().
+        self.assert_mdev_usage(self.compute1, expected_amount=2)
-        # So, eventually, we only have one vGPU for this instance.
+        # Let's verify those are spread between both GPU RPs
-        self.assert_mdev_usage(self.compute1, expected_amount=1)
+        rp_uuid = self.compute_rp_uuids['host1']
        rp_uuids = self._get_all_rp_uuids_in_a_tree(rp_uuid)
        for rp in rp_uuids:
            inventory = self._get_provider_inventory(rp)
            if orc.VGPU in inventory:
                usage = self._get_provider_usages(rp)
                self.assertEqual(1, usage[orc.VGPU])
 class VGPUMultipleTypesTests(VGPUTestBase):
@@ -28547,13 +28547,14 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
            }
        }
        drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
-        self.assertIsNone(drvr._allocate_mdevs(allocations=allocations))
+        self.assertEqual([], drvr._allocate_mdevs(allocations=allocations))
    def _get_fake_provider_tree_with_vgpu(self):
-        """Returns a fake ProviderTree with VGPU inventory on two children RPs
+        """Returns a fake ProviderTree with VGPU inventory on 3 children RPs
-        with one with a correct name and the other one wrong.
+        with the first two with a correct name and the third wrong.
-        The child provider is named rp1 and its UUID is uuids.rp1.
+        The child providers are named rp[1-3] and their UUIDs are uuids.rp1,
        uuids.rp2 and uuids.rp3
        """
        cn_rp = dict(
            uuid=uuids.cn,
@@ -28573,10 +28574,14 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
        pt.new_child(cn_rp['name'] + '_' + 'pci_0000_06_00_0', cn_rp['uuid'],
                     uuid=uuids.rp1, generation=0)
        pt.update_inventory(uuids.rp1, vgpu_rp_inv)
-        # Create a second child with a bad naming convention
+        # Create a second child also with a correct naming attribute
-        pt.new_child('oops_I_did_it_again', cn_rp['uuid'],
+        pt.new_child(cn_rp['name'] + '_' + 'pci_0000_07_00_0', cn_rp['uuid'],
                     uuid=uuids.rp2, generation=0)
        pt.update_inventory(uuids.rp2, vgpu_rp_inv)
        # Create a third child with a bad naming convention
        pt.new_child('oops_I_did_it_again', cn_rp['uuid'],
                     uuid=uuids.rp3, generation=0)
        pt.update_inventory(uuids.rp3, vgpu_rp_inv)
        return pt
    @mock.patch.object(libvirt_driver.LibvirtDriver,
@@ -28603,6 +28608,37 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
        get_unassigned_mdevs.assert_called_once_with('pci_0000_06_00_0',
                                                     ['nvidia-11'])
    @mock.patch.object(libvirt_driver.LibvirtDriver,
                       '_get_existing_mdevs_not_assigned')
    @mock.patch.object(libvirt_driver.LibvirtDriver,
                       '_get_supported_mdev_resource_classes')
    def test_allocate_mdevs_with_multiple_allocs(self, get_supported_mdev_rcs,
                                                 get_unassigned_mdevs):
        self.flags(enabled_mdev_types=['nvidia-11'], group='devices')
        allocations = {
            uuids.rp1: {
                'resources': {
                    orc.VGPU: 1,
                }
            },
            uuids.rp2: {
                'resources': {
                    orc.VGPU: 1,
                }
            }
        }
        get_supported_mdev_rcs.return_value = set([orc.VGPU])
        get_unassigned_mdevs.side_effect = (set([uuids.mdev1]),
                                            set([uuids.mdev2]))
        drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
        # Mock the fact update_provider_tree() should have run
        drvr.provider_tree = self._get_fake_provider_tree_with_vgpu()
        self.assertEqual([uuids.mdev1, uuids.mdev2],
                         drvr._allocate_mdevs(allocations=allocations))
        get_unassigned_mdevs.assert_has_calls(
            [mock.call('pci_0000_06_00_0', ['nvidia-11']),
             mock.call('pci_0000_07_00_0', ['nvidia-11'])])
    @mock.patch.object(libvirt_driver.LibvirtDriver,
                       '_get_mdev_capable_devices')
    @mock.patch.object(libvirt_driver.LibvirtDriver,
@@ -28663,7 +28699,7 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
        # Test that we were unable to guess the RP name
        allocations = {
-            uuids.rp2: {
+            uuids.rp3: {
                'resources': {
                    orc.VGPU: 1,
                }
@@ -9060,43 +9060,39 @@ class LibvirtDriver(driver.ComputeDriver):
                            That code is supporting Placement API version 1.12
        """
        vgpu_allocations = self._vgpu_allocations(allocations)
        if not vgpu_allocations:
            return
        # TODO(sbauza): For the moment, we only support allocations for only
        # one pGPU.
        if len(vgpu_allocations) > 1:
            LOG.warning('More than one allocation was passed over to libvirt '
                        'while at the moment libvirt only supports one. Only '
                        'the first allocation will be looked up.')
        rp_uuid, alloc = next(iter(vgpu_allocations.items()))
        # We only have one allocation with a supported resource class
        vgpus_asked = list(alloc['resources'].values())[0]
-        # Find if we allocated against a specific pGPU (and then the allocation
+        chosen_mdevs = []
-        # is made against a child RP) or any pGPU (in case the VGPU inventory
+        for rp_uuid, alloc in vgpu_allocations.items():
-        # is still on the root RP)
+            # We only have one allocation with a supported resource class
-        try:
+            # FIXME(sbauza): If a new vfio-mdev usage supports more than one
-            allocated_rp = self.provider_tree.data(rp_uuid)
+            # type per PCI device, we would need to modify this. For the
-        except ValueError:
+            # moment, all of the vfio-mdev drivers that we know only support
-            # The provider doesn't exist, return a better understandable
+            # one type per mdev-supported device.
-            # exception
+            vgpus_asked = list(alloc['resources'].values())[0]
-            raise exception.ComputeResourcesUnavailable(
+
-                reason='mdev-capable resource is not available')
+            try:
-        # FIXME(sbauza): The functional reshape test assumes that we could
+                allocated_rp = self.provider_tree.data(rp_uuid)
-        # run _allocate_mdevs() against non-nested RPs but this is impossible
+            except ValueError:
-        # as all inventories have been reshaped *before now* since it's done
+                # The provider doesn't exist, return a better understandable
-        # on init_host() (when the compute restarts or whatever else calls it).
+                # exception
-        # That said, since fixing the functional test isn't easy yet, let's
+                raise exception.ComputeResourcesUnavailable(
-        # assume we still support a non-nested RP for now.
+                    reason='Resource Provider %s is missing' % rp_uuid)
        if allocated_rp.parent_uuid is None:
            # We are on a root RP
            parent_device = None
        else:
            rp_name = allocated_rp.name
            # There can be multiple roots, we need to find the root name
            # to guess the physical device name
            roots = list(self.provider_tree.roots)
            for root in roots:
                # FIXME(sbauza): The functional reshape test assumes that we
                # could run _allocate_mdevs() against non-nested RPs but this
                # is impossible as all inventories have been reshaped *before
                # now* since it's done on init_host() (when the compute
                # restarts or whatever else calls it). That said, since fixing
                # the functional test isn't easy yet, let's assume we still
                # support a non-nested RP for now.
                if allocated_rp.parent_uuid is None:
                    # We are on a root RP
                    parent_device = None
                    break
                if rp_name.startswith(root.name + '_'):
                    # The RP name convention is :
                    #    root_name + '_' + parent_device
@@ -9113,28 +9109,29 @@ class LibvirtDriver(driver.ComputeDriver):
                raise exception.ComputeResourcesUnavailable(
                    reason='mdev-capable resource is not available')
-        supported_types = self.supported_vgpu_types
+            supported_types = self.supported_vgpu_types
-        # Which mediated devices are created but not assigned to a guest ?
+            # Which mediated devices are created but not assigned to a guest ?
-        mdevs_available = self._get_existing_mdevs_not_assigned(
+            mdevs_available = self._get_existing_mdevs_not_assigned(
-            parent_device, supported_types)
+                parent_device, supported_types)
-        chosen_mdevs = []
+            for c in range(vgpus_asked):
-        for c in range(vgpus_asked):
+                chosen_mdev = None
-            chosen_mdev = None
+                if mdevs_available:
-            if mdevs_available:
+                    # Take the first available mdev
-                # Take the first available mdev
+                    chosen_mdev = mdevs_available.pop()
-                chosen_mdev = mdevs_available.pop()
+                else:
-            else:
+                    LOG.debug('No available mdevs where found. '
-                LOG.debug('No available mdevs where found. '
+                              'Creating a new one...')
-                          'Creating an new one...')
+                    chosen_mdev = self._create_new_mediated_device(
-                chosen_mdev = self._create_new_mediated_device(parent_device)
+                        parent_device)
-            if not chosen_mdev:
+                if not chosen_mdev:
-                # If we can't find devices having available VGPUs, just raise
+                    # If we can't find devices having available VGPUs, just
-                raise exception.ComputeResourcesUnavailable(
+                    # raise
-                    reason='mdev-capable resource is not available')
+                    raise exception.ComputeResourcesUnavailable(
-            else:
+                        reason='mdev-capable resource is not available')
-                chosen_mdevs.append(chosen_mdev)
+                else:
-                LOG.info('Allocated mdev: %s.', chosen_mdev)
+                    chosen_mdevs.append(chosen_mdev)
                    LOG.info('Allocated mdev: %s.', chosen_mdev)
        return chosen_mdevs
    def _detach_mediated_devices(self, guest):
@@ -0,0 +1,9 @@
 ---
 fixes:
  - |
    You can now request different resource groups in your flavor for VGPU or
    generic mediated device custom resource classes. Previously, only the
    first resource request group was honored. See `bug #1758086`_ for more
    details.
    .. _bug #1758086: https://bugs.launchpad.net/nova/+bug/1758086