Support multiple allocations for vGPUs
Removing the TODO that only allows one VGPU allocation per instance. Now we no longer need to support the very old VGPU usage for the root provider, this is easy. Change-Id: I48d2b700049c81071710e37c05579239255c3539 Related-Bug: #1758086 Signed-off-by: Sylvain Bauza <sbauza@redhat.com>
This commit is contained in:
@@ -172,6 +172,39 @@ provided by compute nodes.
|
|||||||
$ openstack server create --flavor vgpu_1 --image cirros-0.3.5-x86_64-uec --wait test-vgpu
|
$ openstack server create --flavor vgpu_1 --image cirros-0.3.5-x86_64-uec --wait test-vgpu
|
||||||
|
|
||||||
|
|
||||||
|
Ask for more than one vGPU per instance by the flavor
|
||||||
|
-----------------------------------------------------
|
||||||
|
|
||||||
|
.. versionchanged:: 33.0.0
|
||||||
|
|
||||||
|
We have an open bug report `bug 1758086`_ explaining that the nvidia driver
|
||||||
|
doesn't support more than one vGPU per instance (and per GPU resource - which
|
||||||
|
can be a physical GPU or a virtual function, see nvidia docs for more details).
|
||||||
|
In order to alleviate this problem, this is mandatory to require in the flavor
|
||||||
|
to have all the vGPUs to be spread between multiple GPU resource providers.
|
||||||
|
|
||||||
|
For example, you can request two groups of vGPUs this way :
|
||||||
|
|
||||||
|
.. code-block:: console
|
||||||
|
|
||||||
|
$ openstack flavor set vgpu_2 --property "resources1:VGPU=1" \
|
||||||
|
--property "resources2:VGPU=1" \
|
||||||
|
|
||||||
|
|
||||||
|
With SR-IOV GPUs (you may need to refer to nvidia documentation to know the
|
||||||
|
distinction), this will work without requiring further attributes as every
|
||||||
|
single VGPU Resource Provider only provides a single VGPU resource.
|
||||||
|
|
||||||
|
For non-SRIOV GPUs, you may require other properties in order to request
|
||||||
|
Placement to allocate you some host with two distinct GPUs.
|
||||||
|
You may need to create distinct custom traits per GPU or custom resource
|
||||||
|
classes for explicitly telling in your flavor that you would want resources
|
||||||
|
from distinct entities, or you could use ``group_policy=isolate`` as a property
|
||||||
|
but you would need to make sure that you don't ask for other resources but
|
||||||
|
virtual GPUs in your flavor or Placement would shard all the allocations for
|
||||||
|
*all* resource groups.
|
||||||
|
|
||||||
|
|
||||||
How to discover a GPU type
|
How to discover a GPU type
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
||||||
@@ -490,6 +523,7 @@ For nested vGPUs:
|
|||||||
.. _bug 1762688: https://bugs.launchpad.net/nova/+bug/1762688
|
.. _bug 1762688: https://bugs.launchpad.net/nova/+bug/1762688
|
||||||
.. _bug 1948705: https://bugs.launchpad.net/nova/+bug/1948705
|
.. _bug 1948705: https://bugs.launchpad.net/nova/+bug/1948705
|
||||||
.. _supports vGPU live-migrations: https://specs.openstack.org/openstack/nova-specs/specs/2024.1/approved/libvirt-mdev-live-migrate.html
|
.. _supports vGPU live-migrations: https://specs.openstack.org/openstack/nova-specs/specs/2024.1/approved/libvirt-mdev-live-migrate.html
|
||||||
|
.. _bug 1758086: https://bugs.launchpad.net/nova/+bug/1758086
|
||||||
|
|
||||||
.. Links
|
.. Links
|
||||||
.. _Intel GVT-g: https://01.org/igvt-g
|
.. _Intel GVT-g: https://01.org/igvt-g
|
||||||
|
|||||||
@@ -321,10 +321,16 @@ class VGPUTests(VGPUTestBase):
|
|||||||
image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
|
image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
|
||||||
flavor_id=flavor, networks='auto', host=self.compute1.host)
|
flavor_id=flavor, networks='auto', host=self.compute1.host)
|
||||||
|
|
||||||
# FIXME(sbauza): Unfortunately, we only accept one allocation per
|
# Eventually, we have two allocations and two mdevs
|
||||||
# instance by the libvirt driver as you can see in _allocate_mdevs().
|
self.assert_mdev_usage(self.compute1, expected_amount=2)
|
||||||
# So, eventually, we only have one vGPU for this instance.
|
# Let's verify those are spread between both GPU RPs
|
||||||
self.assert_mdev_usage(self.compute1, expected_amount=1)
|
rp_uuid = self.compute_rp_uuids['host1']
|
||||||
|
rp_uuids = self._get_all_rp_uuids_in_a_tree(rp_uuid)
|
||||||
|
for rp in rp_uuids:
|
||||||
|
inventory = self._get_provider_inventory(rp)
|
||||||
|
if orc.VGPU in inventory:
|
||||||
|
usage = self._get_provider_usages(rp)
|
||||||
|
self.assertEqual(1, usage[orc.VGPU])
|
||||||
|
|
||||||
|
|
||||||
class VGPUMultipleTypesTests(VGPUTestBase):
|
class VGPUMultipleTypesTests(VGPUTestBase):
|
||||||
|
|||||||
@@ -28547,13 +28547,14 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
||||||
self.assertIsNone(drvr._allocate_mdevs(allocations=allocations))
|
self.assertEqual([], drvr._allocate_mdevs(allocations=allocations))
|
||||||
|
|
||||||
def _get_fake_provider_tree_with_vgpu(self):
|
def _get_fake_provider_tree_with_vgpu(self):
|
||||||
"""Returns a fake ProviderTree with VGPU inventory on two children RPs
|
"""Returns a fake ProviderTree with VGPU inventory on 3 children RPs
|
||||||
with one with a correct name and the other one wrong.
|
with the first two with a correct name and the third wrong.
|
||||||
|
|
||||||
The child provider is named rp1 and its UUID is uuids.rp1.
|
The child providers are named rp[1-3] and their UUIDs are uuids.rp1,
|
||||||
|
uuids.rp2 and uuids.rp3
|
||||||
"""
|
"""
|
||||||
cn_rp = dict(
|
cn_rp = dict(
|
||||||
uuid=uuids.cn,
|
uuid=uuids.cn,
|
||||||
@@ -28573,10 +28574,14 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
|
|||||||
pt.new_child(cn_rp['name'] + '_' + 'pci_0000_06_00_0', cn_rp['uuid'],
|
pt.new_child(cn_rp['name'] + '_' + 'pci_0000_06_00_0', cn_rp['uuid'],
|
||||||
uuid=uuids.rp1, generation=0)
|
uuid=uuids.rp1, generation=0)
|
||||||
pt.update_inventory(uuids.rp1, vgpu_rp_inv)
|
pt.update_inventory(uuids.rp1, vgpu_rp_inv)
|
||||||
# Create a second child with a bad naming convention
|
# Create a second child also with a correct naming attribute
|
||||||
pt.new_child('oops_I_did_it_again', cn_rp['uuid'],
|
pt.new_child(cn_rp['name'] + '_' + 'pci_0000_07_00_0', cn_rp['uuid'],
|
||||||
uuid=uuids.rp2, generation=0)
|
uuid=uuids.rp2, generation=0)
|
||||||
pt.update_inventory(uuids.rp2, vgpu_rp_inv)
|
pt.update_inventory(uuids.rp2, vgpu_rp_inv)
|
||||||
|
# Create a third child with a bad naming convention
|
||||||
|
pt.new_child('oops_I_did_it_again', cn_rp['uuid'],
|
||||||
|
uuid=uuids.rp3, generation=0)
|
||||||
|
pt.update_inventory(uuids.rp3, vgpu_rp_inv)
|
||||||
return pt
|
return pt
|
||||||
|
|
||||||
@mock.patch.object(libvirt_driver.LibvirtDriver,
|
@mock.patch.object(libvirt_driver.LibvirtDriver,
|
||||||
@@ -28603,6 +28608,37 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
|
|||||||
get_unassigned_mdevs.assert_called_once_with('pci_0000_06_00_0',
|
get_unassigned_mdevs.assert_called_once_with('pci_0000_06_00_0',
|
||||||
['nvidia-11'])
|
['nvidia-11'])
|
||||||
|
|
||||||
|
@mock.patch.object(libvirt_driver.LibvirtDriver,
|
||||||
|
'_get_existing_mdevs_not_assigned')
|
||||||
|
@mock.patch.object(libvirt_driver.LibvirtDriver,
|
||||||
|
'_get_supported_mdev_resource_classes')
|
||||||
|
def test_allocate_mdevs_with_multiple_allocs(self, get_supported_mdev_rcs,
|
||||||
|
get_unassigned_mdevs):
|
||||||
|
self.flags(enabled_mdev_types=['nvidia-11'], group='devices')
|
||||||
|
allocations = {
|
||||||
|
uuids.rp1: {
|
||||||
|
'resources': {
|
||||||
|
orc.VGPU: 1,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
uuids.rp2: {
|
||||||
|
'resources': {
|
||||||
|
orc.VGPU: 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
get_supported_mdev_rcs.return_value = set([orc.VGPU])
|
||||||
|
get_unassigned_mdevs.side_effect = (set([uuids.mdev1]),
|
||||||
|
set([uuids.mdev2]))
|
||||||
|
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
||||||
|
# Mock the fact update_provider_tree() should have run
|
||||||
|
drvr.provider_tree = self._get_fake_provider_tree_with_vgpu()
|
||||||
|
self.assertEqual([uuids.mdev1, uuids.mdev2],
|
||||||
|
drvr._allocate_mdevs(allocations=allocations))
|
||||||
|
get_unassigned_mdevs.assert_has_calls(
|
||||||
|
[mock.call('pci_0000_06_00_0', ['nvidia-11']),
|
||||||
|
mock.call('pci_0000_07_00_0', ['nvidia-11'])])
|
||||||
|
|
||||||
@mock.patch.object(libvirt_driver.LibvirtDriver,
|
@mock.patch.object(libvirt_driver.LibvirtDriver,
|
||||||
'_get_mdev_capable_devices')
|
'_get_mdev_capable_devices')
|
||||||
@mock.patch.object(libvirt_driver.LibvirtDriver,
|
@mock.patch.object(libvirt_driver.LibvirtDriver,
|
||||||
@@ -28663,7 +28699,7 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
|
|||||||
|
|
||||||
# Test that we were unable to guess the RP name
|
# Test that we were unable to guess the RP name
|
||||||
allocations = {
|
allocations = {
|
||||||
uuids.rp2: {
|
uuids.rp3: {
|
||||||
'resources': {
|
'resources': {
|
||||||
orc.VGPU: 1,
|
orc.VGPU: 1,
|
||||||
}
|
}
|
||||||
|
|||||||
+49
-52
@@ -9060,43 +9060,39 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||||||
That code is supporting Placement API version 1.12
|
That code is supporting Placement API version 1.12
|
||||||
"""
|
"""
|
||||||
vgpu_allocations = self._vgpu_allocations(allocations)
|
vgpu_allocations = self._vgpu_allocations(allocations)
|
||||||
if not vgpu_allocations:
|
|
||||||
return
|
|
||||||
# TODO(sbauza): For the moment, we only support allocations for only
|
|
||||||
# one pGPU.
|
|
||||||
if len(vgpu_allocations) > 1:
|
|
||||||
LOG.warning('More than one allocation was passed over to libvirt '
|
|
||||||
'while at the moment libvirt only supports one. Only '
|
|
||||||
'the first allocation will be looked up.')
|
|
||||||
rp_uuid, alloc = next(iter(vgpu_allocations.items()))
|
|
||||||
# We only have one allocation with a supported resource class
|
|
||||||
vgpus_asked = list(alloc['resources'].values())[0]
|
|
||||||
|
|
||||||
# Find if we allocated against a specific pGPU (and then the allocation
|
chosen_mdevs = []
|
||||||
# is made against a child RP) or any pGPU (in case the VGPU inventory
|
for rp_uuid, alloc in vgpu_allocations.items():
|
||||||
# is still on the root RP)
|
# We only have one allocation with a supported resource class
|
||||||
try:
|
# FIXME(sbauza): If a new vfio-mdev usage supports more than one
|
||||||
allocated_rp = self.provider_tree.data(rp_uuid)
|
# type per PCI device, we would need to modify this. For the
|
||||||
except ValueError:
|
# moment, all of the vfio-mdev drivers that we know only support
|
||||||
# The provider doesn't exist, return a better understandable
|
# one type per mdev-supported device.
|
||||||
# exception
|
vgpus_asked = list(alloc['resources'].values())[0]
|
||||||
raise exception.ComputeResourcesUnavailable(
|
|
||||||
reason='mdev-capable resource is not available')
|
try:
|
||||||
# FIXME(sbauza): The functional reshape test assumes that we could
|
allocated_rp = self.provider_tree.data(rp_uuid)
|
||||||
# run _allocate_mdevs() against non-nested RPs but this is impossible
|
except ValueError:
|
||||||
# as all inventories have been reshaped *before now* since it's done
|
# The provider doesn't exist, return a better understandable
|
||||||
# on init_host() (when the compute restarts or whatever else calls it).
|
# exception
|
||||||
# That said, since fixing the functional test isn't easy yet, let's
|
raise exception.ComputeResourcesUnavailable(
|
||||||
# assume we still support a non-nested RP for now.
|
reason='Resource Provider %s is missing' % rp_uuid)
|
||||||
if allocated_rp.parent_uuid is None:
|
|
||||||
# We are on a root RP
|
|
||||||
parent_device = None
|
|
||||||
else:
|
|
||||||
rp_name = allocated_rp.name
|
rp_name = allocated_rp.name
|
||||||
# There can be multiple roots, we need to find the root name
|
# There can be multiple roots, we need to find the root name
|
||||||
# to guess the physical device name
|
# to guess the physical device name
|
||||||
roots = list(self.provider_tree.roots)
|
roots = list(self.provider_tree.roots)
|
||||||
for root in roots:
|
for root in roots:
|
||||||
|
# FIXME(sbauza): The functional reshape test assumes that we
|
||||||
|
# could run _allocate_mdevs() against non-nested RPs but this
|
||||||
|
# is impossible as all inventories have been reshaped *before
|
||||||
|
# now* since it's done on init_host() (when the compute
|
||||||
|
# restarts or whatever else calls it). That said, since fixing
|
||||||
|
# the functional test isn't easy yet, let's assume we still
|
||||||
|
# support a non-nested RP for now.
|
||||||
|
if allocated_rp.parent_uuid is None:
|
||||||
|
# We are on a root RP
|
||||||
|
parent_device = None
|
||||||
|
break
|
||||||
if rp_name.startswith(root.name + '_'):
|
if rp_name.startswith(root.name + '_'):
|
||||||
# The RP name convention is :
|
# The RP name convention is :
|
||||||
# root_name + '_' + parent_device
|
# root_name + '_' + parent_device
|
||||||
@@ -9113,28 +9109,29 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||||||
raise exception.ComputeResourcesUnavailable(
|
raise exception.ComputeResourcesUnavailable(
|
||||||
reason='mdev-capable resource is not available')
|
reason='mdev-capable resource is not available')
|
||||||
|
|
||||||
supported_types = self.supported_vgpu_types
|
supported_types = self.supported_vgpu_types
|
||||||
# Which mediated devices are created but not assigned to a guest ?
|
# Which mediated devices are created but not assigned to a guest ?
|
||||||
mdevs_available = self._get_existing_mdevs_not_assigned(
|
mdevs_available = self._get_existing_mdevs_not_assigned(
|
||||||
parent_device, supported_types)
|
parent_device, supported_types)
|
||||||
|
|
||||||
chosen_mdevs = []
|
for c in range(vgpus_asked):
|
||||||
for c in range(vgpus_asked):
|
chosen_mdev = None
|
||||||
chosen_mdev = None
|
if mdevs_available:
|
||||||
if mdevs_available:
|
# Take the first available mdev
|
||||||
# Take the first available mdev
|
chosen_mdev = mdevs_available.pop()
|
||||||
chosen_mdev = mdevs_available.pop()
|
else:
|
||||||
else:
|
LOG.debug('No available mdevs where found. '
|
||||||
LOG.debug('No available mdevs where found. '
|
'Creating a new one...')
|
||||||
'Creating an new one...')
|
chosen_mdev = self._create_new_mediated_device(
|
||||||
chosen_mdev = self._create_new_mediated_device(parent_device)
|
parent_device)
|
||||||
if not chosen_mdev:
|
if not chosen_mdev:
|
||||||
# If we can't find devices having available VGPUs, just raise
|
# If we can't find devices having available VGPUs, just
|
||||||
raise exception.ComputeResourcesUnavailable(
|
# raise
|
||||||
reason='mdev-capable resource is not available')
|
raise exception.ComputeResourcesUnavailable(
|
||||||
else:
|
reason='mdev-capable resource is not available')
|
||||||
chosen_mdevs.append(chosen_mdev)
|
else:
|
||||||
LOG.info('Allocated mdev: %s.', chosen_mdev)
|
chosen_mdevs.append(chosen_mdev)
|
||||||
|
LOG.info('Allocated mdev: %s.', chosen_mdev)
|
||||||
return chosen_mdevs
|
return chosen_mdevs
|
||||||
|
|
||||||
def _detach_mediated_devices(self, guest):
|
def _detach_mediated_devices(self, guest):
|
||||||
|
|||||||
@@ -0,0 +1,9 @@
|
|||||||
|
---
|
||||||
|
fixes:
|
||||||
|
- |
|
||||||
|
You can now request different resource groups in your flavor for VGPU or
|
||||||
|
generic mediated device custom resource classes. Previously, only the
|
||||||
|
first resource request group was honored. See `bug #1758086`_ for more
|
||||||
|
details.
|
||||||
|
|
||||||
|
.. _bug #1758086: https://bugs.launchpad.net/nova/+bug/1758086
|
||||||
Reference in New Issue
Block a user