pci: track host NUMA topology in stats

For the upcoming `socket` affinity, PCI stats needs to know the
host's NUMA topology in order to calculate the socket affinity of PCI
devices (based on their 'numa_node'). In this patch, the PCI manager
starts using its compute_node parameter to give a NUMA topology to PCI
stats.

PCI stats needs to track the NUMA topology at the object (not the
class) level, so all the '_filter_*()' classmethods that, in a
subsequent patch, will either use 'self.numa_topology' directly or
call a method that does, are moved to the object level. They were only
ever called via self anyways.

Implements: blueprint pci-socket-affinity
Change-Id: I1d270cc3e88a74097eefe3b887106222ae06fe1c
This commit is contained in:
Artom Lifshitz
2021-02-04 13:21:37 -05:00
parent 6c3175d3ee
commit 890b6d54a6
10 changed files with 42 additions and 34 deletions
+7 -1
View File
@@ -63,7 +63,13 @@ class PciDevTracker(object):
self.stale = {} self.stale = {}
self.node_id = compute_node.id self.node_id = compute_node.id
self.dev_filter = whitelist.Whitelist(CONF.pci.passthrough_whitelist) self.dev_filter = whitelist.Whitelist(CONF.pci.passthrough_whitelist)
self.stats = stats.PciDeviceStats(dev_filter=self.dev_filter) numa_topology = compute_node.numa_topology
if numa_topology:
# For legacy reasons, the NUMATopology is stored as a JSON blob.
# Deserialize it into a real object.
numa_topology = objects.NUMATopology.obj_from_db_obj(numa_topology)
self.stats = stats.PciDeviceStats(
numa_topology, dev_filter=self.dev_filter)
self._context = context self._context = context
self.pci_devs = objects.PciDeviceList.get_by_compute_node( self.pci_devs = objects.PciDeviceList.get_by_compute_node(
context, self.node_id) context, self.node_id)
+9 -12
View File
@@ -54,8 +54,9 @@ class PciDeviceStats(object):
pool_keys = ['product_id', 'vendor_id', 'numa_node', 'dev_type'] pool_keys = ['product_id', 'vendor_id', 'numa_node', 'dev_type']
def __init__(self, stats=None, dev_filter=None): def __init__(self, numa_topology, stats=None, dev_filter=None):
super(PciDeviceStats, self).__init__() super(PciDeviceStats, self).__init__()
self.numa_topology = numa_topology
# NOTE(sbauza): Stats are a PCIDevicePoolList object # NOTE(sbauza): Stats are a PCIDevicePoolList object
self.pools = [pci_pool.to_dict() self.pools = [pci_pool.to_dict()
for pci_pool in stats] if stats else [] for pci_pool in stats] if stats else []
@@ -234,8 +235,7 @@ class PciDeviceStats(object):
except exception.PciDeviceNotFound: except exception.PciDeviceNotFound:
return return
@staticmethod def _filter_pools_for_spec(self, pools, request):
def _filter_pools_for_spec(pools, request):
"""Filter out pools that don't match the request's device spec. """Filter out pools that don't match the request's device spec.
Exclude pools that do not match the specified ``vendor_id``, Exclude pools that do not match the specified ``vendor_id``,
@@ -254,8 +254,7 @@ class PciDeviceStats(object):
if utils.pci_device_prop_match(pool, request_specs) if utils.pci_device_prop_match(pool, request_specs)
] ]
@classmethod def _filter_pools_for_numa_cells(self, pools, request, numa_cells):
def _filter_pools_for_numa_cells(cls, pools, request, numa_cells):
"""Filter out pools with the wrong NUMA affinity, if required. """Filter out pools with the wrong NUMA affinity, if required.
Exclude pools that do not have *suitable* PCI NUMA affinity. Exclude pools that do not have *suitable* PCI NUMA affinity.
@@ -324,8 +323,7 @@ class PciDeviceStats(object):
return sorted( return sorted(
pools, key=lambda pool: pool.get('numa_node') not in numa_cell_ids) pools, key=lambda pool: pool.get('numa_node') not in numa_cell_ids)
@classmethod def _filter_pools_for_unrequested_pfs(self, pools, request):
def _filter_pools_for_unrequested_pfs(cls, pools, request):
"""Filter out pools with PFs, unless these are required. """Filter out pools with PFs, unless these are required.
This is necessary in cases where PFs and VFs have the same product_id This is necessary in cases where PFs and VFs have the same product_id
@@ -347,8 +345,7 @@ class PciDeviceStats(object):
] ]
return pools return pools
@classmethod def _filter_pools(self, pools, request, numa_cells):
def _filter_pools(cls, pools, request, numa_cells):
"""Determine if an individual PCI request can be met. """Determine if an individual PCI request can be met.
Filter pools, which are collections of devices with similar traits, to Filter pools, which are collections of devices with similar traits, to
@@ -372,7 +369,7 @@ class PciDeviceStats(object):
# Firstly, let's exclude all devices that don't match our spec (e.g. # Firstly, let's exclude all devices that don't match our spec (e.g.
# they've got different PCI IDs or something) # they've got different PCI IDs or something)
before_count = sum([pool['count'] for pool in pools]) before_count = sum([pool['count'] for pool in pools])
pools = cls._filter_pools_for_spec(pools, request) pools = self._filter_pools_for_spec(pools, request)
after_count = sum([pool['count'] for pool in pools]) after_count = sum([pool['count'] for pool in pools])
if after_count < before_count: if after_count < before_count:
@@ -389,7 +386,7 @@ class PciDeviceStats(object):
# *assuming* we have devices and care about that, as determined by # *assuming* we have devices and care about that, as determined by
# policy # policy
before_count = after_count before_count = after_count
pools = cls._filter_pools_for_numa_cells(pools, request, numa_cells) pools = self._filter_pools_for_numa_cells(pools, request, numa_cells)
after_count = sum([pool['count'] for pool in pools]) after_count = sum([pool['count'] for pool in pools])
if after_count < before_count: if after_count < before_count:
@@ -405,7 +402,7 @@ class PciDeviceStats(object):
# Finally, if we're not requesting PFs then we should not use these. # Finally, if we're not requesting PFs then we should not use these.
# Exclude them. # Exclude them.
before_count = after_count before_count = after_count
pools = cls._filter_pools_for_unrequested_pfs(pools, request) pools = self._filter_pools_for_unrequested_pfs(pools, request)
after_count = sum([pool['count'] for pool in pools]) after_count = sum([pool['count'] for pool in pools])
if after_count < before_count: if after_count < before_count:
+1
View File
@@ -227,6 +227,7 @@ class HostState(object):
self.numa_topology = objects.NUMATopology.obj_from_db_obj( self.numa_topology = objects.NUMATopology.obj_from_db_obj(
compute.numa_topology) if compute.numa_topology else None compute.numa_topology) if compute.numa_topology else None
self.pci_stats = pci_stats.PciDeviceStats( self.pci_stats = pci_stats.PciDeviceStats(
self.numa_topology,
stats=compute.pci_device_pools) stats=compute.pci_device_pools)
# All virt drivers report host_ip # All virt drivers report host_ip
+1 -1
View File
@@ -61,7 +61,7 @@ class DummyTracker(object):
return_value=objects.PciDeviceList() return_value=objects.PciDeviceList()
): ):
self.pci_tracker = pci_manager.PciDevTracker( self.pci_tracker = pci_manager.PciDevTracker(
ctxt, objects.ComputeNode(id=1)) ctxt, objects.ComputeNode(id=1, numa_topology=None))
class ClaimTestCase(test.NoDBTestCase): class ClaimTestCase(test.NoDBTestCase):
+5 -4
View File
@@ -144,7 +144,7 @@ class PciDevTrackerTestCase(test.NoDBTestCase):
def _create_tracker(self, fake_devs): def _create_tracker(self, fake_devs):
self.fake_devs = fake_devs self.fake_devs = fake_devs
self.tracker = manager.PciDevTracker( self.tracker = manager.PciDevTracker(
self.fake_context, objects.ComputeNode(id=1)) self.fake_context, objects.ComputeNode(id=1, numa_topology=None))
def setUp(self): def setUp(self):
super(PciDevTrackerTestCase, self).setUp() super(PciDevTrackerTestCase, self).setUp()
@@ -222,7 +222,7 @@ class PciDevTrackerTestCase(test.NoDBTestCase):
fake_pci_devs = [copy.deepcopy(fake_pci_4), copy.deepcopy(fake_pci_5)] fake_pci_devs = [copy.deepcopy(fake_pci_4), copy.deepcopy(fake_pci_5)]
fake_pci_devs_json = jsonutils.dumps(fake_pci_devs) fake_pci_devs_json = jsonutils.dumps(fake_pci_devs)
tracker = manager.PciDevTracker( tracker = manager.PciDevTracker(
self.fake_context, objects.ComputeNode(id=1)) self.fake_context, objects.ComputeNode(id=1, numa_topology=None))
tracker.update_devices_from_hypervisor_resources(fake_pci_devs_json) tracker.update_devices_from_hypervisor_resources(fake_pci_devs_json)
self.assertEqual(5, len(tracker.pci_devs)) self.assertEqual(5, len(tracker.pci_devs))
@@ -251,7 +251,7 @@ class PciDevTrackerTestCase(test.NoDBTestCase):
fake_pci_devs = [fake_pci] fake_pci_devs = [fake_pci]
fake_pci_devs_json = jsonutils.dumps(fake_pci_devs) fake_pci_devs_json = jsonutils.dumps(fake_pci_devs)
tracker = manager.PciDevTracker( tracker = manager.PciDevTracker(
self.fake_context, objects.ComputeNode(id=1)) self.fake_context, objects.ComputeNode(id=1, numa_topology=None))
# We expect that the device with 32bit PCI domain is ignored, so we'll # We expect that the device with 32bit PCI domain is ignored, so we'll
# have only the 3 original fake devs # have only the 3 original fake devs
tracker.update_devices_from_hypervisor_resources(fake_pci_devs_json) tracker.update_devices_from_hypervisor_resources(fake_pci_devs_json)
@@ -426,7 +426,8 @@ class PciDevTrackerTestCase(test.NoDBTestCase):
fake_devs_numa = copy.deepcopy(fake_db_devs) fake_devs_numa = copy.deepcopy(fake_db_devs)
fake_devs_numa.append(fake_db_dev_3) fake_devs_numa.append(fake_db_dev_3)
self.tracker = manager.PciDevTracker( self.tracker = manager.PciDevTracker(
mock.sentinel.context, objects.ComputeNode(id=1)) mock.sentinel.context,
objects.ComputeNode(id=1, numa_topology=None))
self.tracker._set_hvdevs(fake_devs_numa) self.tracker._set_hvdevs(fake_devs_numa)
pci_requests = copy.deepcopy(fake_pci_requests)[:1] pci_requests = copy.deepcopy(fake_pci_requests)[:1]
pci_requests[0]['count'] = 2 pci_requests[0]['count'] = 2
+9 -7
View File
@@ -97,7 +97,7 @@ class PciDeviceStatsTestCase(test.NoDBTestCase):
def setUp(self): def setUp(self):
super(PciDeviceStatsTestCase, self).setUp() super(PciDeviceStatsTestCase, self).setUp()
self.pci_stats = stats.PciDeviceStats() self.pci_stats = stats.PciDeviceStats(objects.NUMATopology())
# The following two calls need to be made before adding the devices. # The following two calls need to be made before adding the devices.
patcher = fakes.fake_pci_whitelist() patcher = fakes.fake_pci_whitelist()
self.addCleanup(patcher.stop) self.addCleanup(patcher.stop)
@@ -123,7 +123,7 @@ class PciDeviceStatsTestCase(test.NoDBTestCase):
self.fake_dev_2) self.fake_dev_2)
def test_pci_stats_equivalent(self): def test_pci_stats_equivalent(self):
pci_stats2 = stats.PciDeviceStats() pci_stats2 = stats.PciDeviceStats(objects.NUMATopology())
for dev in [self.fake_dev_1, for dev in [self.fake_dev_1,
self.fake_dev_2, self.fake_dev_2,
self.fake_dev_3, self.fake_dev_3,
@@ -132,7 +132,7 @@ class PciDeviceStatsTestCase(test.NoDBTestCase):
self.assertEqual(self.pci_stats, pci_stats2) self.assertEqual(self.pci_stats, pci_stats2)
def test_pci_stats_not_equivalent(self): def test_pci_stats_not_equivalent(self):
pci_stats2 = stats.PciDeviceStats() pci_stats2 = stats.PciDeviceStats(objects.NUMATopology())
for dev in [self.fake_dev_1, for dev in [self.fake_dev_1,
self.fake_dev_2, self.fake_dev_2,
self.fake_dev_3]: self.fake_dev_3]:
@@ -141,7 +141,7 @@ class PciDeviceStatsTestCase(test.NoDBTestCase):
def test_object_create(self): def test_object_create(self):
m = self.pci_stats.to_device_pools_obj() m = self.pci_stats.to_device_pools_obj()
new_stats = stats.PciDeviceStats(m) new_stats = stats.PciDeviceStats(objects.NUMATopology(), m)
self.assertEqual(len(new_stats.pools), 3) self.assertEqual(len(new_stats.pools), 3)
self.assertEqual(set([d['count'] for d in new_stats]), self.assertEqual(set([d['count'] for d in new_stats]),
@@ -426,7 +426,7 @@ class PciDeviceStatsTestCase(test.NoDBTestCase):
def test_white_list_parsing(self, mock_whitelist_parse): def test_white_list_parsing(self, mock_whitelist_parse):
white_list = '{"product_id":"0001", "vendor_id":"8086"}' white_list = '{"product_id":"0001", "vendor_id":"8086"}'
CONF.set_override('passthrough_whitelist', white_list, 'pci') CONF.set_override('passthrough_whitelist', white_list, 'pci')
pci_stats = stats.PciDeviceStats() pci_stats = stats.PciDeviceStats(objects.NUMATopology())
pci_stats.add_device(self.fake_dev_2) pci_stats.add_device(self.fake_dev_2)
pci_stats.remove_device(self.fake_dev_2) pci_stats.remove_device(self.fake_dev_2)
self.assertEqual(1, mock_whitelist_parse.call_count) self.assertEqual(1, mock_whitelist_parse.call_count)
@@ -441,7 +441,9 @@ class PciDeviceStatsWithTagsTestCase(test.NoDBTestCase):
'{"vendor_id":"1137","product_id":"0072"}'] '{"vendor_id":"1137","product_id":"0072"}']
self.flags(passthrough_whitelist=white_list, group='pci') self.flags(passthrough_whitelist=white_list, group='pci')
dev_filter = whitelist.Whitelist(white_list) dev_filter = whitelist.Whitelist(white_list)
self.pci_stats = stats.PciDeviceStats(dev_filter=dev_filter) self.pci_stats = stats.PciDeviceStats(
objects.NUMATopology(),
dev_filter=dev_filter)
def _create_pci_devices(self): def _create_pci_devices(self):
self.pci_tagged_devices = [] self.pci_tagged_devices = []
@@ -594,7 +596,7 @@ class PciDeviceVFPFStatsTestCase(test.NoDBTestCase):
white_list = ['{"vendor_id":"8086","product_id":"1528"}', white_list = ['{"vendor_id":"8086","product_id":"1528"}',
'{"vendor_id":"8086","product_id":"1515"}'] '{"vendor_id":"8086","product_id":"1515"}']
self.flags(passthrough_whitelist=white_list, group='pci') self.flags(passthrough_whitelist=white_list, group='pci')
self.pci_stats = stats.PciDeviceStats() self.pci_stats = stats.PciDeviceStats(objects.NUMATopology())
def _create_pci_devices(self, vf_product_id=1515, pf_product_id=1528): def _create_pci_devices(self, vf_product_id=1515, pf_product_id=1528):
self.sriov_pf_devices = [] self.sriov_pf_devices = []
@@ -70,7 +70,8 @@ class TestPCIPassthroughFilter(test.NoDBTestCase):
requests = objects.InstancePCIRequests(requests=[request]) requests = objects.InstancePCIRequests(requests=[request])
spec_obj = objects.RequestSpec(pci_requests=requests) spec_obj = objects.RequestSpec(pci_requests=requests)
host = fakes.FakeHostState('host1', 'node1', host = fakes.FakeHostState('host1', 'node1',
attribute_dict={'pci_stats': stats.PciDeviceStats()}) attribute_dict={
'pci_stats': stats.PciDeviceStats(objects.NUMATopology())})
self.assertFalse(self.filt_cls.host_passes(host, spec_obj)) self.assertFalse(self.filt_cls.host_passes(host, spec_obj))
def test_pci_passthrough_with_pci_stats_none(self): def test_pci_passthrough_with_pci_stats_none(self):
@@ -1568,10 +1568,9 @@ class HostStateTestCase(test.NoDBTestCase):
host = host_manager.HostState("fakehost", "fakenode", uuids.cell) host = host_manager.HostState("fakehost", "fakenode", uuids.cell)
self.assertIsNone(host.updated) self.assertIsNone(host.updated)
host.pci_stats = pci_stats.PciDeviceStats( host.pci_stats = pci_stats.PciDeviceStats(
[objects.PciDevicePool(vendor_id='8086', objects.NUMATopology(),
product_id='15ed', [objects.PciDevicePool(vendor_id='8086', product_id='15ed',
numa_node=1, numa_node=1, count=1)])
count=1)])
host.numa_topology = fakes.NUMA_TOPOLOGY host.numa_topology = fakes.NUMA_TOPOLOGY
host.consume_from_request(req_spec) host.consume_from_request(req_spec)
self.assertIsInstance(req_spec.numa_topology, self.assertIsInstance(req_spec.numa_topology,
@@ -1602,7 +1601,7 @@ class HostStateTestCase(test.NoDBTestCase):
self.assertIsNone(host.updated) self.assertIsNone(host.updated)
fake_updated = mock.sentinel.fake_updated fake_updated = mock.sentinel.fake_updated
host.updated = fake_updated host.updated = fake_updated
host.pci_stats = pci_stats.PciDeviceStats() host.pci_stats = pci_stats.PciDeviceStats(objects.NUMATopology())
with mock.patch.object(host.pci_stats, 'apply_requests', with mock.patch.object(host.pci_stats, 'apply_requests',
side_effect=exception.PciDeviceRequestFailed): side_effect=exception.PciDeviceRequestFailed):
host.consume_from_request(req_spec) host.consume_from_request(req_spec)
@@ -37,7 +37,7 @@ def _create_pci_stats(counts):
return None return None
pools = [_create_pci_pool(count) for count in counts] pools = [_create_pci_pool(count) for count in counts]
return stats.PciDeviceStats(pools) return stats.PciDeviceStats(objects.NUMATopology(), pools)
class PCIWeigherTestCase(test.NoDBTestCase): class PCIWeigherTestCase(test.NoDBTestCase):
+3 -2
View File
@@ -2831,7 +2831,7 @@ class VirtNUMAHostTopologyTestCase(test.NoDBTestCase):
pci_request = objects.InstancePCIRequest(count=1, pci_request = objects.InstancePCIRequest(count=1,
spec=[{'vendor_id': '8086'}]) spec=[{'vendor_id': '8086'}])
pci_reqs = [pci_request] pci_reqs = [pci_request]
pci_stats = stats.PciDeviceStats() pci_stats = stats.PciDeviceStats(objects.NUMATopology())
with mock.patch.object(stats.PciDeviceStats, with mock.patch.object(stats.PciDeviceStats,
'support_requests', return_value= True): 'support_requests', return_value= True):
fitted_instance1 = hw.numa_fit_instance_to_host(self.host, fitted_instance1 = hw.numa_fit_instance_to_host(self.host,
@@ -2845,7 +2845,7 @@ class VirtNUMAHostTopologyTestCase(test.NoDBTestCase):
pci_request = objects.InstancePCIRequest(count=1, pci_request = objects.InstancePCIRequest(count=1,
spec=[{'vendor_id': '8086'}]) spec=[{'vendor_id': '8086'}])
pci_reqs = [pci_request] pci_reqs = [pci_request]
pci_stats = stats.PciDeviceStats() pci_stats = stats.PciDeviceStats(objects.NUMATopology())
with mock.patch.object(stats.PciDeviceStats, with mock.patch.object(stats.PciDeviceStats,
'support_requests', return_value= False): 'support_requests', return_value= False):
fitted_instance1 = hw.numa_fit_instance_to_host( fitted_instance1 = hw.numa_fit_instance_to_host(
@@ -2861,6 +2861,7 @@ class VirtNUMAHostTopologyTestCase(test.NoDBTestCase):
test_dict = copy.copy(fake_pci.fake_pool_dict) test_dict = copy.copy(fake_pci.fake_pool_dict)
test_dict['numa_node'] = node test_dict['numa_node'] = node
return stats.PciDeviceStats( return stats.PciDeviceStats(
objects.NUMATopology(),
[objects.PciDevicePool.from_dict(test_dict)]) [objects.PciDevicePool.from_dict(test_dict)])
# the PCI device is found on host cell 1 # the PCI device is found on host cell 1