Support "one-time-use" PCI devices
This adds support for devices that will be allocated to an instance once and left in a reserved=total state. An external workflow can put them back into allocatable state by dropping reserved back to zero. Note this requires PCI-in-placement tracking for the affected devices and it is only valid for type-PCI and type-PF devices. Related to blueprint one-time-use-devices Depends-On: https://review.opendev.org/c/openstack/requirements/+/946181 Co-Authored-By: Balazs Gibizer <gibi@redhat.com> Change-Id: Idfe8a746a97d68cd4eae30afb7d22f4e3af80327
This commit is contained in:
@@ -13,3 +13,4 @@ imigration
|
|||||||
childs
|
childs
|
||||||
assertin
|
assertin
|
||||||
notin
|
notin
|
||||||
|
OTU
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ import typing as ty
|
|||||||
import os_resource_classes
|
import os_resource_classes
|
||||||
import os_traits
|
import os_traits
|
||||||
from oslo_log import log as logging
|
from oslo_log import log as logging
|
||||||
|
from oslo_utils import strutils
|
||||||
from oslo_utils import uuidutils
|
from oslo_utils import uuidutils
|
||||||
|
|
||||||
from nova.compute import provider_tree
|
from nova.compute import provider_tree
|
||||||
@@ -134,6 +135,7 @@ class PciResourceProvider:
|
|||||||
self.children_devs: ty.List[pci_device.PciDevice] = []
|
self.children_devs: ty.List[pci_device.PciDevice] = []
|
||||||
self.resource_class: ty.Optional[str] = None
|
self.resource_class: ty.Optional[str] = None
|
||||||
self.traits: ty.Optional[ty.Set[str]] = None
|
self.traits: ty.Optional[ty.Set[str]] = None
|
||||||
|
self.is_otu = False
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def devs(self) -> ty.List[pci_device.PciDevice]:
|
def devs(self) -> ty.List[pci_device.PciDevice]:
|
||||||
@@ -170,6 +172,12 @@ class PciResourceProvider:
|
|||||||
dev.address for dev in self.children_devs),
|
dev.address for dev in self.children_devs),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if 'one_time_use' in dev_spec_tags:
|
||||||
|
# Child devices cannot be OTU. Do not even tolerate setting =false
|
||||||
|
raise exception.PlacementPciException(
|
||||||
|
error=('Only type-PCI and type-PF devices may set '
|
||||||
|
'one_time_use and %s does not qualify') % self.name)
|
||||||
|
|
||||||
self.children_devs.append(dev)
|
self.children_devs.append(dev)
|
||||||
self.resource_class = rc
|
self.resource_class = rc
|
||||||
self.traits = traits
|
self.traits = traits
|
||||||
@@ -183,7 +191,17 @@ class PciResourceProvider:
|
|||||||
|
|
||||||
self.parent_dev = dev
|
self.parent_dev = dev
|
||||||
self.resource_class = _get_rc_for_dev(dev, dev_spec_tags)
|
self.resource_class = _get_rc_for_dev(dev, dev_spec_tags)
|
||||||
self.traits = _get_traits_for_dev(dev_spec_tags)
|
self.is_otu = strutils.bool_from_string(
|
||||||
|
dev_spec_tags.get("one_time_use", "false"))
|
||||||
|
|
||||||
|
traits = _get_traits_for_dev(dev_spec_tags)
|
||||||
|
|
||||||
|
if self.is_otu:
|
||||||
|
# We always decorate OTU providers with a trait so they can be
|
||||||
|
# easily found
|
||||||
|
traits.add(os_traits.HW_PCI_ONE_TIME_USE)
|
||||||
|
|
||||||
|
self.traits = traits
|
||||||
|
|
||||||
def remove_child(self, dev: pci_device.PciDevice) -> None:
|
def remove_child(self, dev: pci_device.PciDevice) -> None:
|
||||||
# Nothing to do here. The update_provider_tree will handle the
|
# Nothing to do here. The update_provider_tree will handle the
|
||||||
@@ -215,6 +233,39 @@ class PciResourceProvider:
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _get_inventories(self):
|
||||||
|
# NOTE(gibi): The rest of the inventory fields (allocation_ratio,
|
||||||
|
# etc.) are defaulted by placement and the default value makes
|
||||||
|
# sense for PCI devices, i.e. no overallocation and PCI can be
|
||||||
|
# allocated one by one. We may set the reserved value to a nonzero
|
||||||
|
# amount on the provider if the operator requests it via the
|
||||||
|
# one_time_use=true flag, but otherwise the operator controls
|
||||||
|
# reserved and nova will not override that value periodically.
|
||||||
|
inventory = {
|
||||||
|
"total": len(self.devs),
|
||||||
|
"max_unit": len(self.devs),
|
||||||
|
}
|
||||||
|
|
||||||
|
self._handle_one_time_use(inventory)
|
||||||
|
|
||||||
|
return {self.resource_class: inventory}
|
||||||
|
|
||||||
|
def _handle_one_time_use(self, inventory: dict):
|
||||||
|
"""Modifies the inventory to reserve the OTU device if allocated"""
|
||||||
|
|
||||||
|
def is_allocated(dev: pci_device.PciDevice) -> bool:
|
||||||
|
return 'instance_uuid' in dev and dev.instance_uuid
|
||||||
|
|
||||||
|
if self.parent_dev and self.is_otu and is_allocated(self.parent_dev):
|
||||||
|
# If we are an allocated parent device, and our one-time-use flag
|
||||||
|
# is set, we need to also set our inventory to reserved.
|
||||||
|
# NOTE(danms): VERY IMPORTANT: we never *ever* want to update
|
||||||
|
# reserved to anything other than len(self.devs), and definitely
|
||||||
|
# not if we are not allocated. These devices are intended to go
|
||||||
|
# from unallocated to allocated AND reserved. They may be
|
||||||
|
# unreserved by an external entity, but never nova.
|
||||||
|
inventory['reserved'] = len(self.devs)
|
||||||
|
|
||||||
def update_provider_tree(
|
def update_provider_tree(
|
||||||
self,
|
self,
|
||||||
provider_tree: provider_tree.ProviderTree,
|
provider_tree: provider_tree.ProviderTree,
|
||||||
@@ -245,19 +296,7 @@ class PciResourceProvider:
|
|||||||
|
|
||||||
provider_tree.update_inventory(
|
provider_tree.update_inventory(
|
||||||
self.name,
|
self.name,
|
||||||
# NOTE(gibi): The rest of the inventory fields (reserved,
|
self._get_inventories(),
|
||||||
# allocation_ratio, etc.) are defaulted by placement and the
|
|
||||||
# default value make sense for PCI devices, i.e. no overallocation
|
|
||||||
# and PCI can be allocated one by one.
|
|
||||||
# Also, this way if the operator sets reserved value in placement
|
|
||||||
# for the PCI inventories directly then nova will not override that
|
|
||||||
# value periodically.
|
|
||||||
{
|
|
||||||
self.resource_class: {
|
|
||||||
"total": len(self.devs),
|
|
||||||
"max_unit": len(self.devs),
|
|
||||||
}
|
|
||||||
},
|
|
||||||
)
|
)
|
||||||
provider_tree.update_traits(self.name, self.traits)
|
provider_tree.update_traits(self.name, self.traits)
|
||||||
|
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ import re
|
|||||||
import string
|
import string
|
||||||
import typing as ty
|
import typing as ty
|
||||||
|
|
||||||
|
import nova.conf
|
||||||
from nova import exception
|
from nova import exception
|
||||||
from nova.i18n import _
|
from nova.i18n import _
|
||||||
from nova import objects
|
from nova import objects
|
||||||
@@ -35,6 +36,7 @@ ANY = '*'
|
|||||||
REGEX_ANY = '.*'
|
REGEX_ANY = '.*'
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
|
CONF = nova.conf.CONF
|
||||||
|
|
||||||
PCISpecAddressType = ty.Union[ty.Dict[str, str], str]
|
PCISpecAddressType = ty.Union[ty.Dict[str, str], str]
|
||||||
|
|
||||||
@@ -320,6 +322,15 @@ class PciDeviceSpec(PciAddressSpec):
|
|||||||
|
|
||||||
self._normalize_device_spec_tag("managed")
|
self._normalize_device_spec_tag("managed")
|
||||||
self._normalize_device_spec_tag("live_migratable")
|
self._normalize_device_spec_tag("live_migratable")
|
||||||
|
self._normalize_device_spec_tag("one_time_use")
|
||||||
|
|
||||||
|
if self.tags.get('one_time_use') == 'true':
|
||||||
|
# Validate that one_time_use=true is not set on devices where we
|
||||||
|
# cannot support proper reservation protection.
|
||||||
|
if not CONF.pci.report_in_placement:
|
||||||
|
raise exception.PciConfigInvalidSpec(
|
||||||
|
reason=_('one_time_use=true requires '
|
||||||
|
'pci.report_in_placement to be enabled'))
|
||||||
|
|
||||||
if self._remote_managed:
|
if self._remote_managed:
|
||||||
if address_obj is None:
|
if address_obj is None:
|
||||||
|
|||||||
@@ -270,6 +270,129 @@ class TestTranslator(test.NoDBTestCase):
|
|||||||
pt.data("fake-node_0000:72:00.0").uuid, pf.extra_info["rp_uuid"]
|
pt.data("fake-node_0000:72:00.0").uuid, pf.extra_info["rp_uuid"]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_otu_decorates_with_trait(self):
|
||||||
|
pv = ppt.PlacementView(
|
||||||
|
"fake-node", instances_under_same_host_resize=[])
|
||||||
|
sd = pci_device.PciDevice(
|
||||||
|
address="0000:71:00.0",
|
||||||
|
parent_addr="0000:71:00.0",
|
||||||
|
dev_type=fields.PciDeviceType.STANDARD,
|
||||||
|
vendor_id="dead",
|
||||||
|
product_id="beef",
|
||||||
|
)
|
||||||
|
pf1 = pci_device.PciDevice(
|
||||||
|
address="0000:72:00.0",
|
||||||
|
parent_addr=None,
|
||||||
|
dev_type=fields.PciDeviceType.SRIOV_PF,
|
||||||
|
vendor_id="dead",
|
||||||
|
product_id="beef",
|
||||||
|
)
|
||||||
|
pf2 = pci_device.PciDevice(
|
||||||
|
address="0000:73:00.0",
|
||||||
|
parent_addr=None,
|
||||||
|
dev_type=fields.PciDeviceType.SRIOV_PF,
|
||||||
|
vendor_id="dead",
|
||||||
|
product_id="beef",
|
||||||
|
)
|
||||||
|
pf3 = pci_device.PciDevice(
|
||||||
|
address="0000:74:00.0",
|
||||||
|
parent_addr=None,
|
||||||
|
dev_type=fields.PciDeviceType.SRIOV_PF,
|
||||||
|
vendor_id="dead",
|
||||||
|
product_id="beef",
|
||||||
|
)
|
||||||
|
vf1 = pci_device.PciDevice(
|
||||||
|
address="0000:75:00.0",
|
||||||
|
parent_addr="0000:75:00.0",
|
||||||
|
dev_type=fields.PciDeviceType.SRIOV_VF,
|
||||||
|
vendor_id="dead",
|
||||||
|
product_id="beef",
|
||||||
|
)
|
||||||
|
vf2 = pci_device.PciDevice(
|
||||||
|
address="0000:74:00.0",
|
||||||
|
parent_addr="0000:76:00.0",
|
||||||
|
dev_type=fields.PciDeviceType.SRIOV_VF,
|
||||||
|
vendor_id="dead",
|
||||||
|
product_id="beef",
|
||||||
|
)
|
||||||
|
|
||||||
|
pt = provider_tree.ProviderTree()
|
||||||
|
pt.new_root("fake-node", uuids.compute_rp)
|
||||||
|
|
||||||
|
# PF and regular devices are fine...
|
||||||
|
pv._add_dev(sd, {'one_time_use': 'true'})
|
||||||
|
pv._add_dev(pf1, {'one_time_use': 'true'})
|
||||||
|
pv._add_dev(pf2, {})
|
||||||
|
pv._add_dev(pf3, {'one_time_use': 'false'})
|
||||||
|
# ... but VFs are not allowed
|
||||||
|
self.assertRaisesRegex(exception.PlacementPciException,
|
||||||
|
'Only.*may set one_time_use',
|
||||||
|
pv._add_dev, vf1, {'one_time_use': 'true'})
|
||||||
|
self.assertRaisesRegex(exception.PlacementPciException,
|
||||||
|
'Only.*may set one_time_use',
|
||||||
|
pv._add_dev, vf2, {'one_time_use': 'false'})
|
||||||
|
pv.update_provider_tree(pt)
|
||||||
|
|
||||||
|
# These are both OTU, make sure we get the trait added
|
||||||
|
self.assertIn('HW_PCI_ONE_TIME_USE',
|
||||||
|
pt.data("fake-node_0000:71:00.0").traits)
|
||||||
|
self.assertIn('HW_PCI_ONE_TIME_USE',
|
||||||
|
pt.data("fake-node_0000:72:00.0").traits)
|
||||||
|
# These are not, so make sure we do not
|
||||||
|
self.assertNotIn('HW_PCI_ONE_TIME_USE',
|
||||||
|
pt.data("fake-node_0000:73:00.0").traits)
|
||||||
|
self.assertNotIn('HW_PCI_ONE_TIME_USE',
|
||||||
|
pt.data("fake-node_0000:74:00.0").traits)
|
||||||
|
|
||||||
|
def test_otu_reservation_workflow(self):
|
||||||
|
pv = ppt.PlacementView(
|
||||||
|
"fake-node", instances_under_same_host_resize=[])
|
||||||
|
sd = pci_device.PciDevice(
|
||||||
|
address="0000:71:00.0",
|
||||||
|
parent_addr="0000:71:00.0",
|
||||||
|
dev_type=fields.PciDeviceType.STANDARD,
|
||||||
|
vendor_id="dead",
|
||||||
|
product_id="beef",
|
||||||
|
)
|
||||||
|
pf = pci_device.PciDevice(
|
||||||
|
address="0000:72:00.0",
|
||||||
|
parent_addr=None,
|
||||||
|
dev_type=fields.PciDeviceType.SRIOV_PF,
|
||||||
|
vendor_id="dead",
|
||||||
|
product_id="beef",
|
||||||
|
)
|
||||||
|
|
||||||
|
pt = provider_tree.ProviderTree()
|
||||||
|
pt.new_root("fake-node", uuids.compute_rp)
|
||||||
|
|
||||||
|
pv._add_dev(sd, {'one_time_use': 'true'})
|
||||||
|
pv._add_dev(pf, {'one_time_use': 'true'})
|
||||||
|
|
||||||
|
def assert_inventory(addr, reserved):
|
||||||
|
self.assertEqual(
|
||||||
|
reserved,
|
||||||
|
pt.data("fake-node_0000:%i:00.0" % addr
|
||||||
|
).inventory['CUSTOM_PCI_DEAD_BEEF'].get('reserved', 0))
|
||||||
|
|
||||||
|
# Before allocation, reserved is unset
|
||||||
|
pv.update_provider_tree(pt)
|
||||||
|
assert_inventory(71, 0)
|
||||||
|
assert_inventory(72, 0)
|
||||||
|
|
||||||
|
# After allocation, reserved gets set to total (only for the device
|
||||||
|
# that is used)
|
||||||
|
pf.instance_uuid = uuids.instance
|
||||||
|
pv.update_provider_tree(pt)
|
||||||
|
assert_inventory(71, 0)
|
||||||
|
assert_inventory(72, 1)
|
||||||
|
|
||||||
|
# After deallocation, reserved is again unchanged (i.e. never
|
||||||
|
# decremented)
|
||||||
|
pf.instance_uuid = None
|
||||||
|
pv.update_provider_tree(pt)
|
||||||
|
assert_inventory(71, 0)
|
||||||
|
assert_inventory(72, 1)
|
||||||
|
|
||||||
def test_update_provider_tree_for_pci_update_pools(self):
|
def test_update_provider_tree_for_pci_update_pools(self):
|
||||||
pt = provider_tree.ProviderTree()
|
pt = provider_tree.ProviderTree()
|
||||||
pt.new_root("fake-node", uuids.compute_rp)
|
pt.new_root("fake-node", uuids.compute_rp)
|
||||||
|
|||||||
@@ -688,3 +688,21 @@ class PciDevSpecRemoteManagedTestCase(test.NoDBTestCase):
|
|||||||
|
|
||||||
pci_obj = objects.PciDevice.create(None, pci_dev)
|
pci_obj = objects.PciDevice.create(None, pci_dev)
|
||||||
self.assertFalse(pci.match_pci_obj(pci_obj))
|
self.assertFalse(pci.match_pci_obj(pci_obj))
|
||||||
|
|
||||||
|
|
||||||
|
class PciDevSpecOTUTestCase(test.NoDBTestCase):
|
||||||
|
|
||||||
|
@mock.patch('os.path.isdir', return_value=True)
|
||||||
|
def test_missing_config(self, mock_isdir):
|
||||||
|
pci_info = {"vendor_id": "8086", "address": "0000:0a:00.0",
|
||||||
|
"product_id": "5057", "one_time_use": "TrUe"}
|
||||||
|
with mock.patch('builtins.open', side_effect=IOError()):
|
||||||
|
# Without report_in_placement=True, we cannot support OTU
|
||||||
|
self.assertRaisesRegex(exception.PciConfigInvalidSpec,
|
||||||
|
"requires pci.report_in_placement",
|
||||||
|
devspec.PciDeviceSpec, pci_info)
|
||||||
|
# With proper config, we can
|
||||||
|
self.flags(report_in_placement=True, group='pci')
|
||||||
|
dev = devspec.PciDeviceSpec(pci_info)
|
||||||
|
# Make sure we normalized the flag
|
||||||
|
self.assertEqual('true', dev.tags['one_time_use'])
|
||||||
|
|||||||
+1
-1
@@ -51,7 +51,7 @@ psutil>=3.2.2 # BSD
|
|||||||
oslo.versionedobjects>=1.35.0 # Apache-2.0
|
oslo.versionedobjects>=1.35.0 # Apache-2.0
|
||||||
os-brick>=6.10.0 # Apache-2.0
|
os-brick>=6.10.0 # Apache-2.0
|
||||||
os-resource-classes>=1.1.0 # Apache-2.0
|
os-resource-classes>=1.1.0 # Apache-2.0
|
||||||
os-traits>=3.3.0 # Apache-2.0
|
os-traits>=3.4.0 # Apache-2.0
|
||||||
os-vif>=3.1.0 # Apache-2.0
|
os-vif>=3.1.0 # Apache-2.0
|
||||||
castellan>=0.16.0 # Apache-2.0
|
castellan>=0.16.0 # Apache-2.0
|
||||||
microversion-parse>=0.2.1 # Apache-2.0
|
microversion-parse>=0.2.1 # Apache-2.0
|
||||||
|
|||||||
Reference in New Issue
Block a user