Merge "Support multiple allocations for vGPUs"
This commit is contained in:
@@ -172,6 +172,39 @@ provided by compute nodes.
|
|||||||
$ openstack server create --flavor vgpu_1 --image cirros-0.3.5-x86_64-uec --wait test-vgpu
|
$ openstack server create --flavor vgpu_1 --image cirros-0.3.5-x86_64-uec --wait test-vgpu
|
||||||
|
|
||||||
|
|
||||||
|
Ask for more than one vGPU per instance by the flavor
|
||||||
|
-----------------------------------------------------
|
||||||
|
|
||||||
|
.. versionchanged:: 33.0.0
|
||||||
|
|
||||||
|
We have an open bug report `bug 1758086`_ explaining that the nvidia driver
|
||||||
|
doesn't support more than one vGPU per instance (and per GPU resource - which
|
||||||
|
can be a physical GPU or a virtual function, see nvidia docs for more details).
|
||||||
|
In order to alleviate this problem, this is mandatory to require in the flavor
|
||||||
|
to have all the vGPUs to be spread between multiple GPU resource providers.
|
||||||
|
|
||||||
|
For example, you can request two groups of vGPUs this way :
|
||||||
|
|
||||||
|
.. code-block:: console
|
||||||
|
|
||||||
|
$ openstack flavor set vgpu_2 --property "resources1:VGPU=1" \
|
||||||
|
--property "resources2:VGPU=1" \
|
||||||
|
|
||||||
|
|
||||||
|
With SR-IOV GPUs (you may need to refer to nvidia documentation to know the
|
||||||
|
distinction), this will work without requiring further attributes as every
|
||||||
|
single VGPU Resource Provider only provides a single VGPU resource.
|
||||||
|
|
||||||
|
For non-SRIOV GPUs, you may require other properties in order to request
|
||||||
|
Placement to allocate you some host with two distinct GPUs.
|
||||||
|
You may need to create distinct custom traits per GPU or custom resource
|
||||||
|
classes for explicitly telling in your flavor that you would want resources
|
||||||
|
from distinct entities, or you could use ``group_policy=isolate`` as a property
|
||||||
|
but you would need to make sure that you don't ask for other resources but
|
||||||
|
virtual GPUs in your flavor or Placement would shard all the allocations for
|
||||||
|
*all* resource groups.
|
||||||
|
|
||||||
|
|
||||||
How to discover a GPU type
|
How to discover a GPU type
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
||||||
@@ -490,6 +523,7 @@ For nested vGPUs:
|
|||||||
.. _bug 1762688: https://bugs.launchpad.net/nova/+bug/1762688
|
.. _bug 1762688: https://bugs.launchpad.net/nova/+bug/1762688
|
||||||
.. _bug 1948705: https://bugs.launchpad.net/nova/+bug/1948705
|
.. _bug 1948705: https://bugs.launchpad.net/nova/+bug/1948705
|
||||||
.. _supports vGPU live-migrations: https://specs.openstack.org/openstack/nova-specs/specs/2024.1/approved/libvirt-mdev-live-migrate.html
|
.. _supports vGPU live-migrations: https://specs.openstack.org/openstack/nova-specs/specs/2024.1/approved/libvirt-mdev-live-migrate.html
|
||||||
|
.. _bug 1758086: https://bugs.launchpad.net/nova/+bug/1758086
|
||||||
|
|
||||||
.. Links
|
.. Links
|
||||||
.. _Intel GVT-g: https://01.org/igvt-g
|
.. _Intel GVT-g: https://01.org/igvt-g
|
||||||
|
|||||||
@@ -321,10 +321,16 @@ class VGPUTests(VGPUTestBase):
|
|||||||
image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
|
image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
|
||||||
flavor_id=flavor, networks='auto', host=self.compute1.host)
|
flavor_id=flavor, networks='auto', host=self.compute1.host)
|
||||||
|
|
||||||
# FIXME(sbauza): Unfortunately, we only accept one allocation per
|
# Eventually, we have two allocations and two mdevs
|
||||||
# instance by the libvirt driver as you can see in _allocate_mdevs().
|
self.assert_mdev_usage(self.compute1, expected_amount=2)
|
||||||
# So, eventually, we only have one vGPU for this instance.
|
# Let's verify those are spread between both GPU RPs
|
||||||
self.assert_mdev_usage(self.compute1, expected_amount=1)
|
rp_uuid = self.compute_rp_uuids['host1']
|
||||||
|
rp_uuids = self._get_all_rp_uuids_in_a_tree(rp_uuid)
|
||||||
|
for rp in rp_uuids:
|
||||||
|
inventory = self._get_provider_inventory(rp)
|
||||||
|
if orc.VGPU in inventory:
|
||||||
|
usage = self._get_provider_usages(rp)
|
||||||
|
self.assertEqual(1, usage[orc.VGPU])
|
||||||
|
|
||||||
|
|
||||||
class VGPUMultipleTypesTests(VGPUTestBase):
|
class VGPUMultipleTypesTests(VGPUTestBase):
|
||||||
|
|||||||
@@ -29397,13 +29397,14 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
||||||
self.assertIsNone(drvr._allocate_mdevs(allocations=allocations))
|
self.assertEqual([], drvr._allocate_mdevs(allocations=allocations))
|
||||||
|
|
||||||
def _get_fake_provider_tree_with_vgpu(self):
|
def _get_fake_provider_tree_with_vgpu(self):
|
||||||
"""Returns a fake ProviderTree with VGPU inventory on two children RPs
|
"""Returns a fake ProviderTree with VGPU inventory on 3 children RPs
|
||||||
with one with a correct name and the other one wrong.
|
with the first two with a correct name and the third wrong.
|
||||||
|
|
||||||
The child provider is named rp1 and its UUID is uuids.rp1.
|
The child providers are named rp[1-3] and their UUIDs are uuids.rp1,
|
||||||
|
uuids.rp2 and uuids.rp3
|
||||||
"""
|
"""
|
||||||
cn_rp = dict(
|
cn_rp = dict(
|
||||||
uuid=uuids.cn,
|
uuid=uuids.cn,
|
||||||
@@ -29423,10 +29424,14 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
|
|||||||
pt.new_child(cn_rp['name'] + '_' + 'pci_0000_06_00_0', cn_rp['uuid'],
|
pt.new_child(cn_rp['name'] + '_' + 'pci_0000_06_00_0', cn_rp['uuid'],
|
||||||
uuid=uuids.rp1, generation=0)
|
uuid=uuids.rp1, generation=0)
|
||||||
pt.update_inventory(uuids.rp1, vgpu_rp_inv)
|
pt.update_inventory(uuids.rp1, vgpu_rp_inv)
|
||||||
# Create a second child with a bad naming convention
|
# Create a second child also with a correct naming attribute
|
||||||
pt.new_child('oops_I_did_it_again', cn_rp['uuid'],
|
pt.new_child(cn_rp['name'] + '_' + 'pci_0000_07_00_0', cn_rp['uuid'],
|
||||||
uuid=uuids.rp2, generation=0)
|
uuid=uuids.rp2, generation=0)
|
||||||
pt.update_inventory(uuids.rp2, vgpu_rp_inv)
|
pt.update_inventory(uuids.rp2, vgpu_rp_inv)
|
||||||
|
# Create a third child with a bad naming convention
|
||||||
|
pt.new_child('oops_I_did_it_again', cn_rp['uuid'],
|
||||||
|
uuid=uuids.rp3, generation=0)
|
||||||
|
pt.update_inventory(uuids.rp3, vgpu_rp_inv)
|
||||||
return pt
|
return pt
|
||||||
|
|
||||||
@mock.patch.object(libvirt_driver.LibvirtDriver,
|
@mock.patch.object(libvirt_driver.LibvirtDriver,
|
||||||
@@ -29453,6 +29458,37 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
|
|||||||
get_unassigned_mdevs.assert_called_once_with('pci_0000_06_00_0',
|
get_unassigned_mdevs.assert_called_once_with('pci_0000_06_00_0',
|
||||||
['nvidia-11'])
|
['nvidia-11'])
|
||||||
|
|
||||||
|
@mock.patch.object(libvirt_driver.LibvirtDriver,
|
||||||
|
'_get_existing_mdevs_not_assigned')
|
||||||
|
@mock.patch.object(libvirt_driver.LibvirtDriver,
|
||||||
|
'_get_supported_mdev_resource_classes')
|
||||||
|
def test_allocate_mdevs_with_multiple_allocs(self, get_supported_mdev_rcs,
|
||||||
|
get_unassigned_mdevs):
|
||||||
|
self.flags(enabled_mdev_types=['nvidia-11'], group='devices')
|
||||||
|
allocations = {
|
||||||
|
uuids.rp1: {
|
||||||
|
'resources': {
|
||||||
|
orc.VGPU: 1,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
uuids.rp2: {
|
||||||
|
'resources': {
|
||||||
|
orc.VGPU: 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
get_supported_mdev_rcs.return_value = set([orc.VGPU])
|
||||||
|
get_unassigned_mdevs.side_effect = (set([uuids.mdev1]),
|
||||||
|
set([uuids.mdev2]))
|
||||||
|
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
||||||
|
# Mock the fact update_provider_tree() should have run
|
||||||
|
drvr.provider_tree = self._get_fake_provider_tree_with_vgpu()
|
||||||
|
self.assertEqual([uuids.mdev1, uuids.mdev2],
|
||||||
|
drvr._allocate_mdevs(allocations=allocations))
|
||||||
|
get_unassigned_mdevs.assert_has_calls(
|
||||||
|
[mock.call('pci_0000_06_00_0', ['nvidia-11']),
|
||||||
|
mock.call('pci_0000_07_00_0', ['nvidia-11'])])
|
||||||
|
|
||||||
@mock.patch.object(libvirt_driver.LibvirtDriver,
|
@mock.patch.object(libvirt_driver.LibvirtDriver,
|
||||||
'_get_mdev_capable_devices')
|
'_get_mdev_capable_devices')
|
||||||
@mock.patch.object(libvirt_driver.LibvirtDriver,
|
@mock.patch.object(libvirt_driver.LibvirtDriver,
|
||||||
@@ -29513,7 +29549,7 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
|
|||||||
|
|
||||||
# Test that we were unable to guess the RP name
|
# Test that we were unable to guess the RP name
|
||||||
allocations = {
|
allocations = {
|
||||||
uuids.rp2: {
|
uuids.rp3: {
|
||||||
'resources': {
|
'resources': {
|
||||||
orc.VGPU: 1,
|
orc.VGPU: 1,
|
||||||
}
|
}
|
||||||
|
|||||||
+24
-27
@@ -9237,43 +9237,39 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||||||
That code is supporting Placement API version 1.12
|
That code is supporting Placement API version 1.12
|
||||||
"""
|
"""
|
||||||
vgpu_allocations = self._vgpu_allocations(allocations)
|
vgpu_allocations = self._vgpu_allocations(allocations)
|
||||||
if not vgpu_allocations:
|
|
||||||
return
|
chosen_mdevs = []
|
||||||
# TODO(sbauza): For the moment, we only support allocations for only
|
for rp_uuid, alloc in vgpu_allocations.items():
|
||||||
# one pGPU.
|
|
||||||
if len(vgpu_allocations) > 1:
|
|
||||||
LOG.warning('More than one allocation was passed over to libvirt '
|
|
||||||
'while at the moment libvirt only supports one. Only '
|
|
||||||
'the first allocation will be looked up.')
|
|
||||||
rp_uuid, alloc = next(iter(vgpu_allocations.items()))
|
|
||||||
# We only have one allocation with a supported resource class
|
# We only have one allocation with a supported resource class
|
||||||
|
# FIXME(sbauza): If a new vfio-mdev usage supports more than one
|
||||||
|
# type per PCI device, we would need to modify this. For the
|
||||||
|
# moment, all of the vfio-mdev drivers that we know only support
|
||||||
|
# one type per mdev-supported device.
|
||||||
vgpus_asked = list(alloc['resources'].values())[0]
|
vgpus_asked = list(alloc['resources'].values())[0]
|
||||||
|
|
||||||
# Find if we allocated against a specific pGPU (and then the allocation
|
|
||||||
# is made against a child RP) or any pGPU (in case the VGPU inventory
|
|
||||||
# is still on the root RP)
|
|
||||||
try:
|
try:
|
||||||
allocated_rp = self.provider_tree.data(rp_uuid)
|
allocated_rp = self.provider_tree.data(rp_uuid)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
# The provider doesn't exist, return a better understandable
|
# The provider doesn't exist, return a better understandable
|
||||||
# exception
|
# exception
|
||||||
raise exception.ComputeResourcesUnavailable(
|
raise exception.ComputeResourcesUnavailable(
|
||||||
reason='mdev-capable resource is not available')
|
reason='Resource Provider %s is missing' % rp_uuid)
|
||||||
# FIXME(sbauza): The functional reshape test assumes that we could
|
|
||||||
# run _allocate_mdevs() against non-nested RPs but this is impossible
|
|
||||||
# as all inventories have been reshaped *before now* since it's done
|
|
||||||
# on init_host() (when the compute restarts or whatever else calls it).
|
|
||||||
# That said, since fixing the functional test isn't easy yet, let's
|
|
||||||
# assume we still support a non-nested RP for now.
|
|
||||||
if allocated_rp.parent_uuid is None:
|
|
||||||
# We are on a root RP
|
|
||||||
parent_device = None
|
|
||||||
else:
|
|
||||||
rp_name = allocated_rp.name
|
rp_name = allocated_rp.name
|
||||||
# There can be multiple roots, we need to find the root name
|
# There can be multiple roots, we need to find the root name
|
||||||
# to guess the physical device name
|
# to guess the physical device name
|
||||||
roots = list(self.provider_tree.roots)
|
roots = list(self.provider_tree.roots)
|
||||||
for root in roots:
|
for root in roots:
|
||||||
|
# FIXME(sbauza): The functional reshape test assumes that we
|
||||||
|
# could run _allocate_mdevs() against non-nested RPs but this
|
||||||
|
# is impossible as all inventories have been reshaped *before
|
||||||
|
# now* since it's done on init_host() (when the compute
|
||||||
|
# restarts or whatever else calls it). That said, since fixing
|
||||||
|
# the functional test isn't easy yet, let's assume we still
|
||||||
|
# support a non-nested RP for now.
|
||||||
|
if allocated_rp.parent_uuid is None:
|
||||||
|
# We are on a root RP
|
||||||
|
parent_device = None
|
||||||
|
break
|
||||||
if rp_name.startswith(root.name + '_'):
|
if rp_name.startswith(root.name + '_'):
|
||||||
# The RP name convention is :
|
# The RP name convention is :
|
||||||
# root_name + '_' + parent_device
|
# root_name + '_' + parent_device
|
||||||
@@ -9295,7 +9291,6 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||||||
mdevs_available = self._get_existing_mdevs_not_assigned(
|
mdevs_available = self._get_existing_mdevs_not_assigned(
|
||||||
parent_device, supported_types)
|
parent_device, supported_types)
|
||||||
|
|
||||||
chosen_mdevs = []
|
|
||||||
for c in range(vgpus_asked):
|
for c in range(vgpus_asked):
|
||||||
chosen_mdev = None
|
chosen_mdev = None
|
||||||
if mdevs_available:
|
if mdevs_available:
|
||||||
@@ -9303,10 +9298,12 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||||||
chosen_mdev = mdevs_available.pop()
|
chosen_mdev = mdevs_available.pop()
|
||||||
else:
|
else:
|
||||||
LOG.debug('No available mdevs where found. '
|
LOG.debug('No available mdevs where found. '
|
||||||
'Creating an new one...')
|
'Creating a new one...')
|
||||||
chosen_mdev = self._create_new_mediated_device(parent_device)
|
chosen_mdev = self._create_new_mediated_device(
|
||||||
|
parent_device)
|
||||||
if not chosen_mdev:
|
if not chosen_mdev:
|
||||||
# If we can't find devices having available VGPUs, just raise
|
# If we can't find devices having available VGPUs, just
|
||||||
|
# raise
|
||||||
raise exception.ComputeResourcesUnavailable(
|
raise exception.ComputeResourcesUnavailable(
|
||||||
reason='mdev-capable resource is not available')
|
reason='mdev-capable resource is not available')
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -0,0 +1,9 @@
|
|||||||
|
---
|
||||||
|
fixes:
|
||||||
|
- |
|
||||||
|
You can now request different resource groups in your flavor for VGPU or
|
||||||
|
generic mediated device custom resource classes. Previously, only the
|
||||||
|
first resource request group was honored. See `bug #1758086`_ for more
|
||||||
|
details.
|
||||||
|
|
||||||
|
.. _bug #1758086: https://bugs.launchpad.net/nova/+bug/1758086
|
||||||
Reference in New Issue
Block a user