Merge "Support multiple allocations for vGPUs"

This commit is contained in:
Zuul
2026-03-11 04:44:04 +00:00
committed by Gerrit Code Review
5 changed files with 145 additions and 63 deletions
+34
View File
@@ -172,6 +172,39 @@ provided by compute nodes.
$ openstack server create --flavor vgpu_1 --image cirros-0.3.5-x86_64-uec --wait test-vgpu $ openstack server create --flavor vgpu_1 --image cirros-0.3.5-x86_64-uec --wait test-vgpu
Ask for more than one vGPU per instance by the flavor
-----------------------------------------------------
.. versionchanged:: 33.0.0
We have an open bug report `bug 1758086`_ explaining that the nvidia driver
doesn't support more than one vGPU per instance (and per GPU resource - which
can be a physical GPU or a virtual function, see nvidia docs for more details).
In order to alleviate this problem, this is mandatory to require in the flavor
to have all the vGPUs to be spread between multiple GPU resource providers.
For example, you can request two groups of vGPUs this way :
.. code-block:: console
$ openstack flavor set vgpu_2 --property "resources1:VGPU=1" \
--property "resources2:VGPU=1" \
With SR-IOV GPUs (you may need to refer to nvidia documentation to know the
distinction), this will work without requiring further attributes as every
single VGPU Resource Provider only provides a single VGPU resource.
For non-SRIOV GPUs, you may require other properties in order to request
Placement to allocate you some host with two distinct GPUs.
You may need to create distinct custom traits per GPU or custom resource
classes for explicitly telling in your flavor that you would want resources
from distinct entities, or you could use ``group_policy=isolate`` as a property
but you would need to make sure that you don't ask for other resources but
virtual GPUs in your flavor or Placement would shard all the allocations for
*all* resource groups.
How to discover a GPU type How to discover a GPU type
-------------------------- --------------------------
@@ -490,6 +523,7 @@ For nested vGPUs:
.. _bug 1762688: https://bugs.launchpad.net/nova/+bug/1762688 .. _bug 1762688: https://bugs.launchpad.net/nova/+bug/1762688
.. _bug 1948705: https://bugs.launchpad.net/nova/+bug/1948705 .. _bug 1948705: https://bugs.launchpad.net/nova/+bug/1948705
.. _supports vGPU live-migrations: https://specs.openstack.org/openstack/nova-specs/specs/2024.1/approved/libvirt-mdev-live-migrate.html .. _supports vGPU live-migrations: https://specs.openstack.org/openstack/nova-specs/specs/2024.1/approved/libvirt-mdev-live-migrate.html
.. _bug 1758086: https://bugs.launchpad.net/nova/+bug/1758086
.. Links .. Links
.. _Intel GVT-g: https://01.org/igvt-g .. _Intel GVT-g: https://01.org/igvt-g
+10 -4
View File
@@ -321,10 +321,16 @@ class VGPUTests(VGPUTestBase):
image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6', image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
flavor_id=flavor, networks='auto', host=self.compute1.host) flavor_id=flavor, networks='auto', host=self.compute1.host)
# FIXME(sbauza): Unfortunately, we only accept one allocation per # Eventually, we have two allocations and two mdevs
# instance by the libvirt driver as you can see in _allocate_mdevs(). self.assert_mdev_usage(self.compute1, expected_amount=2)
# So, eventually, we only have one vGPU for this instance. # Let's verify those are spread between both GPU RPs
self.assert_mdev_usage(self.compute1, expected_amount=1) rp_uuid = self.compute_rp_uuids['host1']
rp_uuids = self._get_all_rp_uuids_in_a_tree(rp_uuid)
for rp in rp_uuids:
inventory = self._get_provider_inventory(rp)
if orc.VGPU in inventory:
usage = self._get_provider_usages(rp)
self.assertEqual(1, usage[orc.VGPU])
class VGPUMultipleTypesTests(VGPUTestBase): class VGPUMultipleTypesTests(VGPUTestBase):
+43 -7
View File
@@ -29397,13 +29397,14 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
} }
} }
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
self.assertIsNone(drvr._allocate_mdevs(allocations=allocations)) self.assertEqual([], drvr._allocate_mdevs(allocations=allocations))
def _get_fake_provider_tree_with_vgpu(self): def _get_fake_provider_tree_with_vgpu(self):
"""Returns a fake ProviderTree with VGPU inventory on two children RPs """Returns a fake ProviderTree with VGPU inventory on 3 children RPs
with one with a correct name and the other one wrong. with the first two with a correct name and the third wrong.
The child provider is named rp1 and its UUID is uuids.rp1. The child providers are named rp[1-3] and their UUIDs are uuids.rp1,
uuids.rp2 and uuids.rp3
""" """
cn_rp = dict( cn_rp = dict(
uuid=uuids.cn, uuid=uuids.cn,
@@ -29423,10 +29424,14 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
pt.new_child(cn_rp['name'] + '_' + 'pci_0000_06_00_0', cn_rp['uuid'], pt.new_child(cn_rp['name'] + '_' + 'pci_0000_06_00_0', cn_rp['uuid'],
uuid=uuids.rp1, generation=0) uuid=uuids.rp1, generation=0)
pt.update_inventory(uuids.rp1, vgpu_rp_inv) pt.update_inventory(uuids.rp1, vgpu_rp_inv)
# Create a second child with a bad naming convention # Create a second child also with a correct naming attribute
pt.new_child('oops_I_did_it_again', cn_rp['uuid'], pt.new_child(cn_rp['name'] + '_' + 'pci_0000_07_00_0', cn_rp['uuid'],
uuid=uuids.rp2, generation=0) uuid=uuids.rp2, generation=0)
pt.update_inventory(uuids.rp2, vgpu_rp_inv) pt.update_inventory(uuids.rp2, vgpu_rp_inv)
# Create a third child with a bad naming convention
pt.new_child('oops_I_did_it_again', cn_rp['uuid'],
uuid=uuids.rp3, generation=0)
pt.update_inventory(uuids.rp3, vgpu_rp_inv)
return pt return pt
@mock.patch.object(libvirt_driver.LibvirtDriver, @mock.patch.object(libvirt_driver.LibvirtDriver,
@@ -29453,6 +29458,37 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
get_unassigned_mdevs.assert_called_once_with('pci_0000_06_00_0', get_unassigned_mdevs.assert_called_once_with('pci_0000_06_00_0',
['nvidia-11']) ['nvidia-11'])
@mock.patch.object(libvirt_driver.LibvirtDriver,
'_get_existing_mdevs_not_assigned')
@mock.patch.object(libvirt_driver.LibvirtDriver,
'_get_supported_mdev_resource_classes')
def test_allocate_mdevs_with_multiple_allocs(self, get_supported_mdev_rcs,
get_unassigned_mdevs):
self.flags(enabled_mdev_types=['nvidia-11'], group='devices')
allocations = {
uuids.rp1: {
'resources': {
orc.VGPU: 1,
}
},
uuids.rp2: {
'resources': {
orc.VGPU: 1,
}
}
}
get_supported_mdev_rcs.return_value = set([orc.VGPU])
get_unassigned_mdevs.side_effect = (set([uuids.mdev1]),
set([uuids.mdev2]))
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
# Mock the fact update_provider_tree() should have run
drvr.provider_tree = self._get_fake_provider_tree_with_vgpu()
self.assertEqual([uuids.mdev1, uuids.mdev2],
drvr._allocate_mdevs(allocations=allocations))
get_unassigned_mdevs.assert_has_calls(
[mock.call('pci_0000_06_00_0', ['nvidia-11']),
mock.call('pci_0000_07_00_0', ['nvidia-11'])])
@mock.patch.object(libvirt_driver.LibvirtDriver, @mock.patch.object(libvirt_driver.LibvirtDriver,
'_get_mdev_capable_devices') '_get_mdev_capable_devices')
@mock.patch.object(libvirt_driver.LibvirtDriver, @mock.patch.object(libvirt_driver.LibvirtDriver,
@@ -29513,7 +29549,7 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
# Test that we were unable to guess the RP name # Test that we were unable to guess the RP name
allocations = { allocations = {
uuids.rp2: { uuids.rp3: {
'resources': { 'resources': {
orc.VGPU: 1, orc.VGPU: 1,
} }
+49 -52
View File
@@ -9237,43 +9237,39 @@ class LibvirtDriver(driver.ComputeDriver):
That code is supporting Placement API version 1.12 That code is supporting Placement API version 1.12
""" """
vgpu_allocations = self._vgpu_allocations(allocations) vgpu_allocations = self._vgpu_allocations(allocations)
if not vgpu_allocations:
return
# TODO(sbauza): For the moment, we only support allocations for only
# one pGPU.
if len(vgpu_allocations) > 1:
LOG.warning('More than one allocation was passed over to libvirt '
'while at the moment libvirt only supports one. Only '
'the first allocation will be looked up.')
rp_uuid, alloc = next(iter(vgpu_allocations.items()))
# We only have one allocation with a supported resource class
vgpus_asked = list(alloc['resources'].values())[0]
# Find if we allocated against a specific pGPU (and then the allocation chosen_mdevs = []
# is made against a child RP) or any pGPU (in case the VGPU inventory for rp_uuid, alloc in vgpu_allocations.items():
# is still on the root RP) # We only have one allocation with a supported resource class
try: # FIXME(sbauza): If a new vfio-mdev usage supports more than one
allocated_rp = self.provider_tree.data(rp_uuid) # type per PCI device, we would need to modify this. For the
except ValueError: # moment, all of the vfio-mdev drivers that we know only support
# The provider doesn't exist, return a better understandable # one type per mdev-supported device.
# exception vgpus_asked = list(alloc['resources'].values())[0]
raise exception.ComputeResourcesUnavailable(
reason='mdev-capable resource is not available') try:
# FIXME(sbauza): The functional reshape test assumes that we could allocated_rp = self.provider_tree.data(rp_uuid)
# run _allocate_mdevs() against non-nested RPs but this is impossible except ValueError:
# as all inventories have been reshaped *before now* since it's done # The provider doesn't exist, return a better understandable
# on init_host() (when the compute restarts or whatever else calls it). # exception
# That said, since fixing the functional test isn't easy yet, let's raise exception.ComputeResourcesUnavailable(
# assume we still support a non-nested RP for now. reason='Resource Provider %s is missing' % rp_uuid)
if allocated_rp.parent_uuid is None:
# We are on a root RP
parent_device = None
else:
rp_name = allocated_rp.name rp_name = allocated_rp.name
# There can be multiple roots, we need to find the root name # There can be multiple roots, we need to find the root name
# to guess the physical device name # to guess the physical device name
roots = list(self.provider_tree.roots) roots = list(self.provider_tree.roots)
for root in roots: for root in roots:
# FIXME(sbauza): The functional reshape test assumes that we
# could run _allocate_mdevs() against non-nested RPs but this
# is impossible as all inventories have been reshaped *before
# now* since it's done on init_host() (when the compute
# restarts or whatever else calls it). That said, since fixing
# the functional test isn't easy yet, let's assume we still
# support a non-nested RP for now.
if allocated_rp.parent_uuid is None:
# We are on a root RP
parent_device = None
break
if rp_name.startswith(root.name + '_'): if rp_name.startswith(root.name + '_'):
# The RP name convention is : # The RP name convention is :
# root_name + '_' + parent_device # root_name + '_' + parent_device
@@ -9290,28 +9286,29 @@ class LibvirtDriver(driver.ComputeDriver):
raise exception.ComputeResourcesUnavailable( raise exception.ComputeResourcesUnavailable(
reason='mdev-capable resource is not available') reason='mdev-capable resource is not available')
supported_types = self.supported_vgpu_types supported_types = self.supported_vgpu_types
# Which mediated devices are created but not assigned to a guest ? # Which mediated devices are created but not assigned to a guest ?
mdevs_available = self._get_existing_mdevs_not_assigned( mdevs_available = self._get_existing_mdevs_not_assigned(
parent_device, supported_types) parent_device, supported_types)
chosen_mdevs = [] for c in range(vgpus_asked):
for c in range(vgpus_asked): chosen_mdev = None
chosen_mdev = None if mdevs_available:
if mdevs_available: # Take the first available mdev
# Take the first available mdev chosen_mdev = mdevs_available.pop()
chosen_mdev = mdevs_available.pop() else:
else: LOG.debug('No available mdevs where found. '
LOG.debug('No available mdevs where found. ' 'Creating a new one...')
'Creating an new one...') chosen_mdev = self._create_new_mediated_device(
chosen_mdev = self._create_new_mediated_device(parent_device) parent_device)
if not chosen_mdev: if not chosen_mdev:
# If we can't find devices having available VGPUs, just raise # If we can't find devices having available VGPUs, just
raise exception.ComputeResourcesUnavailable( # raise
reason='mdev-capable resource is not available') raise exception.ComputeResourcesUnavailable(
else: reason='mdev-capable resource is not available')
chosen_mdevs.append(chosen_mdev) else:
LOG.info('Allocated mdev: %s.', chosen_mdev) chosen_mdevs.append(chosen_mdev)
LOG.info('Allocated mdev: %s.', chosen_mdev)
return chosen_mdevs return chosen_mdevs
def _detach_mediated_devices(self, guest): def _detach_mediated_devices(self, guest):
@@ -0,0 +1,9 @@
---
fixes:
- |
You can now request different resource groups in your flavor for VGPU or
generic mediated device custom resource classes. Previously, only the
first resource request group was honored. See `bug #1758086`_ for more
details.
.. _bug #1758086: https://bugs.launchpad.net/nova/+bug/1758086