From b7c7d45093c9f6f2e3eac5433f3d614ba1cb9a44 Mon Sep 17 00:00:00 2001 From: Takashi Kajinami Date: Wed, 12 Jun 2024 14:28:19 +0900 Subject: [PATCH] Migrate MEM_ENCRYPTION_CONTEXT from root provider This is the preparation work to extend memory encryption support to support AMD SEV-ES, and migrates the MEM_ENCRYPTION_CONTEXT resources to the separate child resource provider, so that slots for different memory encryption mechanism can be modeled. Partially-Implements: blueprint amd-sev-es-libvirt-support Change-Id: Iad51c32d0f64ef52513bd2f2b517c91f29c63787 Signed-off-by: Takashi Kajinami --- .../libvirt/test_report_cpu_traits.py | 58 +++- nova/tests/functional/libvirt/test_reshape.py | 99 ++++++ nova/tests/unit/virt/libvirt/test_driver.py | 306 ++++++++++++++++-- nova/virt/libvirt/driver.py | 229 +++++++++++-- 4 files changed, 624 insertions(+), 68 deletions(-) diff --git a/nova/tests/functional/libvirt/test_report_cpu_traits.py b/nova/tests/functional/libvirt/test_report_cpu_traits.py index 99e68b7b5c..0f8761c0ab 100644 --- a/nova/tests/functional/libvirt/test_report_cpu_traits.py +++ b/nova/tests/functional/libvirt/test_report_cpu_traits.py @@ -30,15 +30,13 @@ CONF = conf.CONF class LibvirtReportTraitsTestBase( integrated_helpers.LibvirtProviderUsageBaseTestCase): - pass - def assertMemEncryptionSlotsEqual(self, slots): - inventory = self._get_provider_inventory(self.host_uuid) + def assertMemEncryptionSlotsEqual(self, rp_uuid, slots): + inventory = self._get_provider_inventory(rp_uuid) if slots == 0: self.assertNotIn(orc.MEM_ENCRYPTION_CONTEXT, inventory) else: self.assertEqual( - inventory[orc.MEM_ENCRYPTION_CONTEXT], { 'total': slots, 'min_unit': 1, @@ -46,9 +44,16 @@ class LibvirtReportTraitsTestBase( 'step_size': 1, 'allocation_ratio': 1.0, 'reserved': 0, - } + }, + inventory[orc.MEM_ENCRYPTION_CONTEXT] ) + def _get_amd_sev_rps(self): + root_rp = self._get_resource_provider_by_uuid(self.host_uuid) + rps = self._get_all_rps_in_a_tree(self.host_uuid) + return [rp for rp in rps + if rp['name'] == '%s_amd_sev' % root_rp['name']] + class LibvirtReportTraitsTests(LibvirtReportTraitsTestBase): # These must match the capabilities in @@ -143,8 +148,10 @@ class LibvirtReportNoSevTraitsTests(LibvirtReportTraitsTestBase): traits = self._get_provider_traits(self.host_uuid) self.assertNotIn(sev_trait, traits) + self.assertMemEncryptionSlotsEqual(self.host_uuid, 0) - self.assertMemEncryptionSlotsEqual(0) + sev_rps = self._get_amd_sev_rps() + self.assertEqual(0, len(sev_rps)) # Now simulate the host gaining SEV functionality. Here we # simulate a kernel update or reconfiguration which causes the @@ -178,13 +185,21 @@ class LibvirtReportNoSevTraitsTests(LibvirtReportTraitsTestBase): self.compute.driver._static_traits = None self._run_periodics() - traits = self._get_provider_traits(self.host_uuid) - self.assertIn(sev_trait, traits) - # Sanity check that we've still got the trait globally. self.assertIn(sev_trait, self._get_all_traits()) - self.assertMemEncryptionSlotsEqual(db_const.MAX_INT) + # sev capabilities are managed by sub rp and are not present in + # root rp + traits = self._get_provider_traits(self.host_uuid) + self.assertNotIn(sev_trait, traits) + self.assertMemEncryptionSlotsEqual(self.host_uuid, 0) + + sev_rps = self._get_amd_sev_rps() + self.assertEqual(1, len(sev_rps)) + sev_rp_uuid = sev_rps[0]['uuid'] + sev_rp_traits = self._get_provider_traits(sev_rp_uuid) + self.assertIn(sev_trait, sev_rp_traits) + self.assertMemEncryptionSlotsEqual(sev_rp_uuid, db_const.MAX_INT) class LibvirtReportSevTraitsTests(LibvirtReportTraitsTestBase): @@ -221,10 +236,17 @@ class LibvirtReportSevTraitsTests(LibvirtReportTraitsTestBase): global_traits = self._get_all_traits() self.assertIn(sev_trait, global_traits) + # sev capabilities are managed by sub rp and are not present in root rp traits = self._get_provider_traits(self.host_uuid) - self.assertIn(sev_trait, traits) + self.assertNotIn(sev_trait, traits) + self.assertMemEncryptionSlotsEqual(self.host_uuid, 0) - self.assertMemEncryptionSlotsEqual(16) + sev_rps = self._get_amd_sev_rps() + self.assertEqual(1, len(sev_rps)) + sev_rp_uuid = sev_rps[0]['uuid'] + sev_rp_traits = self._get_provider_traits(sev_rp_uuid) + self.assertIn(sev_trait, sev_rp_traits) + self.assertMemEncryptionSlotsEqual(sev_rp_uuid, 16) # Now simulate the host losing SEV functionality. Here we # simulate a kernel downgrade or reconfiguration which causes @@ -247,10 +269,14 @@ class LibvirtReportSevTraitsTests(LibvirtReportTraitsTestBase): self.compute.driver._static_traits = None self._run_periodics() - traits = self._get_provider_traits(self.host_uuid) - self.assertNotIn(sev_trait, traits) - # Sanity check that we've still got the trait globally. self.assertIn(sev_trait, self._get_all_traits()) - self.assertMemEncryptionSlotsEqual(0) + traits = self._get_provider_traits(self.host_uuid) + self.assertNotIn(sev_trait, traits) + + # NOTE(tkajinam): Currently the sev rp is not deleted after sev + # support is turned off. This follows the existing behavior for + # other resources such as vGPU. + # sev_rps = self._get_amd_sev_rps() + # self.assertEqual(0, len(sev_rps)) diff --git a/nova/tests/functional/libvirt/test_reshape.py b/nova/tests/functional/libvirt/test_reshape.py index 1f924739e3..f978f6a2c3 100644 --- a/nova/tests/functional/libvirt/test_reshape.py +++ b/nova/tests/functional/libvirt/test_reshape.py @@ -11,11 +11,13 @@ # License for the specific language governing permissions and limitations # under the License. +import copy import io from unittest import mock from oslo_config import cfg from oslo_log import log as logging +from oslo_utils.fixture import uuidsentinel from nova import context from nova import objects @@ -236,3 +238,100 @@ class VGPUReshapeTests(base.ServersTestBase): self.assertEqual( {'VGPU': 1}, allocations[gpu_rp_uuid]['resources']) + + +class SevResphapeTests(base.ServersTestBase): + + def setUp(self): + super().setUp() + admin_context = context.get_admin_context() + hw_mem_enc_image = copy.deepcopy(self.glance.image1) + hw_mem_enc_image['id'] = uuidsentinel.mem_enc_image_id + hw_mem_enc_image['properties']['hw_machine_type'] = 'q35' + hw_mem_enc_image['properties']['hw_firmware_type'] = 'uefi' + hw_mem_enc_image['properties']['hw_mem_encryption'] = True + self.glance.create(admin_context, hw_mem_enc_image) + + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._guest_configure_sev') + def test_create_servers_with_amd_sev(self, mock_configure_sev): + self.hostname = self.start_compute( + hostname='compute1', + ) + self.compute = self.computes[self.hostname] + self.flags(num_memory_encrypted_guests=16, group='libvirt') + + # create the MEM_ENCRYPTION_CONTEXT resource in placement manually, + # to simulate the old layout. + compute_rp_uuid = self.placement.get( + '/resource_providers?name=compute1').body[ + 'resource_providers'][0]['uuid'] + inventories = self.placement.get( + '/resource_providers/%s/inventories' % compute_rp_uuid).body + inventories['inventories']['MEM_ENCRYPTION_CONTEXT'] = { + 'allocation_ratio': 1.0, + 'max_unit': 1, + 'min_unit': 1, + 'reserved': 0, + 'step_size': 1, + 'total': 16} + self.placement.put( + '/resource_providers/%s/inventories' % compute_rp_uuid, + inventories) + + # create a server before reshape + with mock.patch('nova.virt.libvirt.driver.LibvirtDriver.' + 'update_provider_tree'): + pre_server = self._create_server( + image_uuid=uuidsentinel.mem_enc_image_id) + self.addCleanup(self._delete_server, pre_server) + + # verify that the inventory, usages and allocation are correct before + # the reshape + compute_inventory = self.placement.get( + '/resource_providers/%s/inventories' % compute_rp_uuid).body[ + 'inventories'] + self.assertEqual( + 16, compute_inventory['MEM_ENCRYPTION_CONTEXT']['total']) + compute_usages = self.placement.get( + '/resource_providers/%s/usages' % compute_rp_uuid).body[ + 'usages'] + self.assertEqual(1, compute_usages['MEM_ENCRYPTION_CONTEXT']) + + # restart the compute service to trigger reshape + with mock.patch('nova.virt.libvirt.host.Host.supports_amd_sev', + return_value=True): + self.compute = self.restart_compute_service(self.hostname) + + # verify that the inventory, usages and allocation are correct after + # the reshape + compute_inventory = self.placement.get( + '/resource_providers/%s/inventories' % compute_rp_uuid).body[ + 'inventories'] + self.assertNotIn('MEM_ENCRYPTION_CONTEXT', compute_inventory) + compute_usages = self.placement.get( + '/resource_providers/%s/usages' % compute_rp_uuid).body[ + 'usages'] + self.assertNotIn('MEM_ENCRYPTION_CONTEXT', compute_usages) + + sev_rp_uuid = self.placement.get( + '/resource_providers?name=compute1_amd_sev').body[ + 'resource_providers'][0]['uuid'] + sev_inventory = self.placement.get( + '/resource_providers/%s/inventories' % sev_rp_uuid).body[ + 'inventories'] + self.assertEqual( + 16, sev_inventory['MEM_ENCRYPTION_CONTEXT']['total']) + sev_usages = self.placement.get( + '/resource_providers/%s/usages' % sev_rp_uuid).body[ + 'usages'] + self.assertEqual(1, sev_usages['MEM_ENCRYPTION_CONTEXT']) + + # create a new server after reshape + post_server = self._create_server( + image_uuid=uuidsentinel.mem_enc_image_id) + self.addCleanup(self._delete_server, post_server) + + compute_usages = self.placement.get( + '/resource_providers/%s/usages' % sev_rp_uuid).body[ + 'usages'] + self.assertEqual(2, compute_usages['MEM_ENCRYPTION_CONTEXT']) diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index 5b84cba6db..331037d08a 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -23063,6 +23063,9 @@ class TestUpdateProviderTree(test.NoDBTestCase): mock_gpu_invs.return_value = gpu_inventory_dicts # Use an empty list for vpmems. self.driver._vpmems_by_rc = {'CUSTOM_PMEM_NAMESPACE_4GB': []} + # Use total=0 for MEM_ENCRYPTION_CONTEXT + self.driver._host._supports_amd_sev = True + self.driver._host._max_sev_guests = 0 # Before we update_provider_tree, we have 2 providers from setUp(): # self.cn_rp and self.shared_rp and they are both empty {}. self.assertEqual(2, len(self.pt.get_provider_uuids())) @@ -23184,6 +23187,35 @@ class TestUpdateProviderTree(test.NoDBTestCase): self.assertEqual(expected_resources, self.pt.data(self.cn_rp['uuid']).resources) + def test_update_provider_tree_with_memory_encryption(self): + self.driver._host._supports_amd_sev = True + self.driver._host._max_sev_guests = 16 + self._test_update_provider_tree() + inventory = self._get_inventory() + # root compute node provider inventory is unchanged + self.assertEqual(inventory, + (self.pt.data(self.cn_rp['uuid'])).inventory) + # We should have new sev child providers in the tree under the + # compute node root provider. + compute_node_tree_uuids = self.pt.get_provider_uuids( + self.cn_rp['name']) + self.assertEqual(2, len(compute_node_tree_uuids)) + sev_rp_uuid = compute_node_tree_uuids[1] + sev_provider_data = self.pt.data(sev_rp_uuid) + self.assertEqual('%s_amd_sev' % self.cn_rp['name'], + sev_provider_data.name) + self.assertEqual({ + orc.MEM_ENCRYPTION_CONTEXT: { + 'total': 16, + 'step_size': 1, + 'max_unit': 1, + 'min_unit': 1, + 'reserved': 0, + 'allocation_ratio': 1.0 + } + }, sev_provider_data.inventory) + self.assertEqual({ot.HW_CPU_X86_AMD_SEV}, sev_provider_data.traits) + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_local_gb_info', new=mock.Mock(return_value={'total': disk_gb})) @mock.patch('nova.virt.libvirt.host.Host.get_memory_mb_total', @@ -23484,6 +23516,170 @@ class TestUpdateProviderTree(test.NoDBTestCase): self.assertIn('Unexpected VGPU resource allocation on provider %s' % uuids.other_rp, str(ex)) + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver.' + '_get_cpu_feature_traits', + new=mock.Mock(return_value=cpu_traits)) + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_local_gb_info', + new=mock.Mock(return_value={'total': disk_gb})) + @mock.patch('nova.virt.libvirt.host.Host.get_memory_mb_total', + new=mock.Mock(return_value=memory_mb)) + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_pcpu_available', + new=mock.Mock(return_value=range(pcpus))) + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_vcpu_available', + new=mock.Mock(return_value=range(vcpus))) + def test_update_provider_tree_for_memory_encryption_reshape(self): + self.driver._host._supports_amd_sev = True + self.driver._host._max_sev_guests = 16 + # First create a provider tree with MEM_ENCRYPTION_CONTEXT inventory on + # the root node provider. + inventory = self._get_inventory() + sev_inventory = { + orc.MEM_ENCRYPTION_CONTEXT: { + 'total': 16, + 'step_size': 1, + 'max_unit': 1, + 'min_unit': 1, + 'reserved': 0, + 'allocation_ratio': 1.0 + } + } + inventory.update(sev_inventory) + self.pt.update_inventory(self.cn_rp['uuid'], inventory) + # Call update_provider_tree which will raise ReshapeNeeded because + # there is MEM_ENCRYPTION_CONTEXT on the root node provider + self.assertRaises(exception.ReshapeNeeded, + self.driver.update_provider_tree, + self.pt, self.cn_rp['name']) + # Now make up some fake allocations to pass back to the upt method + # for the reshape + allocations = { + uuids.consumer1: { + 'allocations': { + # This consumer has MEM_ENCRYPTION_CONTEXT allocations on + # the root node provider and *should* be changed. + self.cn_rp['uuid']: { + 'resources': { + orc.MEMORY_MB: 512, + orc.VCPU: 2, + orc.MEM_ENCRYPTION_CONTEXT: 1 + } + } + } + }, + uuids.consumer2: { + 'allocations': { + # This consumer has no MEM_ENCRYPTION_CONTEXT allocations + # on the root provider *should not* be changed. + self.cn_rp['uuid']: { + 'resources': { + orc.MEMORY_MB: 512, + orc.VCPU: 2 + } + } + } + } + } + original_allocations = copy.deepcopy(allocations) + # Initiate the reshape + self.driver.update_provider_tree( + self.pt, self.cn_rp['name'], allocations=allocations) + # We should have one SEV child provider in the tree under the compute + # node root provider. + compute_node_tree_uuids = self.pt.get_provider_uuids( + self.cn_rp['name']) + self.assertEqual(2, len(compute_node_tree_uuids)) + # The SEV provider should be the 2nd UUID in the list + sev_rp_uuid = compute_node_tree_uuids[1] + # The MEM_ENCRYPTION_CONTEXT inventory should be on the SEV child + # provider + sev_provider_data = self.pt.data(sev_rp_uuid) + self.assertEqual('%s_amd_sev' % self.cn_rp['name'], + sev_provider_data.name) + self.assertEqual({ + orc.MEM_ENCRYPTION_CONTEXT: { + 'total': 16, + 'step_size': 1, + 'max_unit': 1, + 'min_unit': 1, + 'reserved': 0, + 'allocation_ratio': 1.0 + } + }, sev_provider_data.inventory) + # Make sure the child provider has the SEV trait + self.assertEqual({ot.HW_CPU_X86_AMD_SEV}, sev_provider_data.traits) + + # The compute node root provider should not have MEM_ENCRYPTION_CONTEXT + # inventory. + del inventory[orc.MEM_ENCRYPTION_CONTEXT] + self.assertEqual(inventory, self.pt.data(self.cn_rp['uuid']).inventory) + # consumer1 should now have allocations against two providers, + # MEMORY_MB on the root compute node provider and + # MEM_ENCRYPTION_CONTEXT on the child provider. + consumer1_allocs = allocations[uuids.consumer1]['allocations'] + self.assertEqual(2, len(consumer1_allocs)) + self.assertEqual({orc.MEMORY_MB: 512, orc.VCPU: 2}, + consumer1_allocs[self.cn_rp['uuid']]['resources']) + # Make sure the MEM_ENCRYPTION_CONTEXT allocation moved to + # the corresponding child RP + self.assertEqual({orc.MEM_ENCRYPTION_CONTEXT: 1}, + consumer1_allocs[sev_rp_uuid]['resources']) + # The allocations on consumer2 should be unchanged. + self.assertEqual(original_allocations[uuids.consumer2], + allocations[uuids.consumer2]) + + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver.' + '_get_cpu_feature_traits', + new=mock.Mock(return_value=cpu_traits)) + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_local_gb_info', + new=mock.Mock(return_value={'total': disk_gb})) + @mock.patch('nova.virt.libvirt.host.Host.get_memory_mb_total', + new=mock.Mock(return_value=memory_mb)) + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_pcpu_available', + new=mock.Mock(return_value=range(pcpus))) + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_vcpu_available', + new=mock.Mock(return_value=range(vcpus))) + def test_update_provider_tree_for_memory_encryption_reshape_fails(self): + self.driver._host._supports_amd_sev = True + self.driver._host._max_sev_guests = 16 + # First create a provider tree with MEM_ENCRYPTION_CONTEXT inventory on + # the root node provider. + inventory = self._get_inventory() + sev_inventory = { + orc.MEM_ENCRYPTION_CONTEXT: { + 'total': 16, + 'step_size': 1, + 'max_unit': 1, + 'min_unit': 1, + 'reserved': 0, + 'allocation_ratio': 1.0 + } + } + inventory.update(sev_inventory) + self.pt.update_inventory(self.cn_rp['uuid'], inventory) + # Now make up some fake allocations to pass back to the upt method + # for the reshape + allocations = { + uuids.consumer1: { + 'allocations': { + # This consumer has invalid MEM_ENCRYPTION_CONTEXT on + # a non-root compute node provider. + uuids.other_rp: { + 'resources': { + orc.MEMORY_MB: 512, + orc.MEM_ENCRYPTION_CONTEXT: 1 + } + } + } + } + } + # Initiate the reshape. + ex = self.assertRaises(exception.ReshapeFailed, + self.driver.update_provider_tree, + self.pt, self.cn_rp['name'], + allocations=allocations) + self.assertIn('Unexpected MEM_ENCRYPTION_CONTEXT resource allocation ' + 'on provider %s' % uuids.other_rp, str(ex)) + @mock.patch('nova.objects.instance.Instance.get_by_uuid') @mock.patch('nova.objects.migration.MigrationList' '.get_in_progress_by_host_and_node') @@ -28733,15 +28929,6 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): trait = f'COMPUTE_GRAPHICS_MODEL_{model.upper()}' self.assertIn(trait, model_traits) - @mock.patch.object(libvirt_driver.LibvirtDriver, '_get_cpu_feature_traits', - new=mock.Mock(return_value={})) - def test_cpu_traits__sev_support(self): - for support in (False, True): - self.drvr._host._supports_amd_sev = support - traits = self.drvr._get_cpu_traits() - self.assertIn(ot.HW_CPU_X86_AMD_SEV, traits) - self.assertEqual(support, traits[ot.HW_CPU_X86_AMD_SEV]) - @mock.patch.object(libvirt_driver.LibvirtDriver, '_get_cpu_feature_traits', new=mock.Mock(return_value={})) def test_cpu_traits__hyperthreading_support(self): @@ -30916,25 +31103,25 @@ class TestLibvirtSEV(test.NoDBTestCase): @mock.patch.object(os.path, 'exists', new=mock.Mock(return_value=False)) class TestLibvirtSEVUnsupported(TestLibvirtSEV): - def test_get_mem_encrypted_slots_no_config(self): - self.assertEqual(0, self.driver._get_memory_encrypted_slots()) + def test_get_memory_encryption_inventories_no_config(self): + self.assertEqual({}, self.driver._get_memory_encryption_inventories()) - def test_get_mem_encrypted_slots_config_zero(self): + def test_get_memory_encryption_inventories_config_zero(self): self.flags(num_memory_encrypted_guests=0, group='libvirt') - self.assertEqual(0, self.driver._get_memory_encrypted_slots()) + self.assertEqual({}, self.driver._get_memory_encryption_inventories()) @mock.patch.object(libvirt_driver.LOG, 'warning') - def test_get_mem_encrypted_slots_config_non_zero_unsupported( + def test_get_memory_encryption_inventories_config_non_zero_unsupported( self, mock_log): self.flags(num_memory_encrypted_guests=16, group='libvirt') # Still zero without mocked SEV support - self.assertEqual(0, self.driver._get_memory_encrypted_slots()) + self.assertEqual({}, self.driver._get_memory_encryption_inventories()) mock_log.assert_called_with( 'Host is configured with libvirt.num_memory_encrypted_guests ' 'set to %d, but is not SEV-capable.', 16) - def test_get_mem_encrypted_slots_unsupported(self): - self.assertEqual(0, self.driver._get_memory_encrypted_slots()) + def test_get_memory_encryption_inventories_unsupported(self): + self.assertEqual({}, self.driver._get_memory_encryption_inventories()) @mock.patch.object(vc, '_domain_capability_features', @@ -30943,21 +31130,50 @@ class TestLibvirtSEVSupportedNoMaxGuests(TestLibvirtSEV): """Libvirt driver tests for when AMD SEV support is present.""" @test.patch_exists(SEV_KERNEL_PARAM_FILE, True) @test.patch_open(SEV_KERNEL_PARAM_FILE, "1\n") - def test_get_mem_encrypted_slots_unlimited(self): - self.assertEqual(db_const.MAX_INT, - self.driver._get_memory_encrypted_slots()) + def test_get_memory_encryption_inventories_unlimited(self): + self.assertEqual({ + 'amd_sev': { + 'total': db_const.MAX_INT, + 'step_size': 1, + 'max_unit': 1, + 'min_unit': 1, + 'reserved': 0, + 'allocation_ratio': 1.0, + 'traits': [ot.HW_CPU_X86_AMD_SEV] + } + }, self.driver._get_memory_encryption_inventories()) @test.patch_exists(SEV_KERNEL_PARAM_FILE, True) @test.patch_open(SEV_KERNEL_PARAM_FILE, "1\n") - def test_get_mem_encrypted_slots_config_non_zero_supported(self): + def test_get_memory_encryption_inventories_config_non_zero_supported(self): self.flags(num_memory_encrypted_guests=16, group='libvirt') - self.assertEqual(16, self.driver._get_memory_encrypted_slots()) + self.assertEqual({ + 'amd_sev': { + 'total': 16, + 'step_size': 1, + 'max_unit': 1, + 'min_unit': 1, + 'reserved': 0, + 'allocation_ratio': 1.0, + 'traits': [ot.HW_CPU_X86_AMD_SEV] + } + }, self.driver._get_memory_encryption_inventories()) @test.patch_exists(SEV_KERNEL_PARAM_FILE, True) @test.patch_open(SEV_KERNEL_PARAM_FILE, "1\n") - def test_get_mem_encrypted_slots_config_zero_supported(self): + def test_get_memory_encryption_inventories_config_zero_supported(self): self.flags(num_memory_encrypted_guests=0, group='libvirt') - self.assertEqual(0, self.driver._get_memory_encrypted_slots()) + self.assertEqual({ + 'amd_sev': { + 'total': 0, + 'step_size': 1, + 'max_unit': 1, + 'min_unit': 1, + 'reserved': 0, + 'allocation_ratio': 1.0, + 'traits': [ot.HW_CPU_X86_AMD_SEV] + }, + }, self.driver._get_memory_encryption_inventories()) @mock.patch.object(vc, '_domain_capability_features', @@ -30967,16 +31183,36 @@ class TestLibvirtSEVSupportedMaxGuests(TestLibvirtSEV): @test.patch_exists(SEV_KERNEL_PARAM_FILE, True) @test.patch_open(SEV_KERNEL_PARAM_FILE, "1\n") @mock.patch.object(libvirt_driver.LOG, 'warning') - def test_get_mem_encrypted_slots_no_override(self, mock_log): - self.assertEqual(100, self.driver._get_memory_encrypted_slots()) + def test_get_memory_encryption_inventories_no_override(self, mock_log): + self.assertEqual({ + 'amd_sev': { + 'total': 100, + 'step_size': 1, + 'max_unit': 1, + 'min_unit': 1, + 'reserved': 0, + 'allocation_ratio': 1.0, + 'traits': [ot.HW_CPU_X86_AMD_SEV] + }, + }, self.driver._get_memory_encryption_inventories()) mock_log.assert_not_called() @test.patch_exists(SEV_KERNEL_PARAM_FILE, True) @test.patch_open(SEV_KERNEL_PARAM_FILE, "1\n") @mock.patch.object(libvirt_driver.LOG, 'warning') - def test_get_mem_encrypted_slots_overlide_more(self, mock_log): + def test_get_memory_encryption_inventories_override_more(self, mock_log): self.flags(num_memory_encrypted_guests=120, group='libvirt') - self.assertEqual(100, self.driver._get_memory_encrypted_slots()) + self.assertEqual({ + 'amd_sev': { + 'total': 100, + 'step_size': 1, + 'max_unit': 1, + 'min_unit': 1, + 'reserved': 0, + 'allocation_ratio': 1.0, + 'traits': [ot.HW_CPU_X86_AMD_SEV] + } + }, self.driver._get_memory_encryption_inventories()) mock_log.assert_called_with( 'Host is configured with libvirt.num_memory_encrypted_guests ' 'set to %d, but supports only %d.', 120, 100) @@ -30984,9 +31220,19 @@ class TestLibvirtSEVSupportedMaxGuests(TestLibvirtSEV): @test.patch_exists(SEV_KERNEL_PARAM_FILE, True) @test.patch_open(SEV_KERNEL_PARAM_FILE, "1\n") @mock.patch.object(libvirt_driver.LOG, 'warning') - def test_get_mem_encrypted_slots_override_less(self, mock_log): + def test_get_memory_encryption_inventories_override_less(self, mock_log): self.flags(num_memory_encrypted_guests=80, group='libvirt') - self.assertEqual(80, self.driver._get_memory_encrypted_slots()) + self.assertEqual({ + 'amd_sev': { + 'total': 80, + 'step_size': 1, + 'max_unit': 1, + 'min_unit': 1, + 'reserved': 0, + 'allocation_ratio': 1.0, + 'traits': [ot.HW_CPU_X86_AMD_SEV] + } + }, self.driver._get_memory_encryption_inventories()) mock_log.assert_not_called() diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 75d113261e..f71458fc30 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -9498,7 +9498,6 @@ class LibvirtDriver(driver.ComputeDriver): memory_mb = int(self._host.get_memory_mb_total()) vcpus = len(self._get_vcpu_available()) pcpus = len(self._get_pcpu_available()) - memory_enc_slots = self._get_memory_encrypted_slots() # NOTE(yikun): If the inv record does not exists, the allocation_ratio # will use the CONF.xxx_allocation_ratio value if xxx_allocation_ratio @@ -9543,16 +9542,6 @@ class LibvirtDriver(driver.ComputeDriver): 'reserved': 0, } - if memory_enc_slots: - result[orc.MEM_ENCRYPTION_CONTEXT] = { - 'total': memory_enc_slots, - 'min_unit': 1, - 'max_unit': 1, - 'step_size': 1, - 'allocation_ratio': 1.0, - 'reserved': 0, - } - # If a sharing DISK_GB provider exists in the provider tree, then our # storage is shared, and we should not report the DISK_GB inventory in # the compute node provider. @@ -9584,6 +9573,9 @@ class LibvirtDriver(driver.ComputeDriver): self._update_provider_tree_for_vpmems( provider_tree, nodename, result, resources) + self._update_provider_tree_for_memory_encryption( + provider_tree, nodename, allocations=allocations) + provider_tree.update_inventory(nodename, result) provider_tree.update_resources(nodename, resources) @@ -9625,7 +9617,189 @@ class LibvirtDriver(driver.ComputeDriver): metadata=vpmem) resources[rc].add(resource_obj) - def _get_memory_encrypted_slots(self): + def _update_provider_tree_for_memory_encryption(self, provider_tree, + nodename, allocations): + """Updates the provider tree for MEM_ENCRYPTION_CONTEXT inventory. + + Before 2025.2, MEM_ENCRYPTION_CONTEXT inventory and allocations were on + the root compute node provider in the tree. Starting in 2025.2, + the MEM_ENCRYPTION_CONTEXT inventory is on a child provider in + the tree. As a result, this method will "reshape" the tree if necessary + on first start of this compute service in 2025.2. + + :param provider_tree: The ProviderTree to update. + :param nodename: The ComputeNode.hypervisor_hostname, also known as + the name of the root node provider in the tree for this host. + :param allocations: If not None, indicates a reshape was requested and + should be performed. + :raises: nova.exception.ReshapeNeeded if ``allocations`` is None and + the method determines a reshape of the tree is needed, i.e. + MEM_ENCRYPTION_CONTEXT inventory and allocations must be migrated + from the root node provider to a child provider of + MEM_ENCRYPTION_CONTEXT resources in the tree. + :raises: nova.exception.ReshapeFailed if the requested tree reshape + fails for whatever reason. + """ + inventories_dict = self._get_memory_encryption_inventories() + if not inventories_dict: + return + + me_rps = self._ensure_memory_encryption_providers( + inventories_dict, provider_tree, nodename) + + if self._is_reshape_needed_memory_encryption_on_root(provider_tree, + nodename): + if allocations is None: + LOG.info('Requesting provider tree reshape in order to move ' + 'memory encryption context inventory from the root ' + 'compute node provider %s to a child provider.', + nodename) + raise exception.ReshapeNeeded() + root_node = provider_tree.data(nodename) + self._reshape_memory_encryption_resources(allocations, root_node, + me_rps) + if provider_tree.has_traits(nodename, [ot.HW_CPU_X86_AMD_SEV]): + provider_tree.remove_traits(nodename, ot.HW_CPU_X86_AMD_SEV) + if orc.MEM_ENCRYPTION_CONTEXT in root_node.inventory: + del root_node.inventory[orc.MEM_ENCRYPTION_CONTEXT] + provider_tree.update_inventory(nodename, root_node.inventory) + + @staticmethod + def _is_reshape_needed_memory_encryption_on_root(provider_tree, nodename): + """Determine if root RP has MEM_ENCRYPTION_CONTEXT inventories. + + Check to see if the root compute node provider in the tree for + this host already has MEM_ENCRYPTION_CONTEXT inventory because if it + does, we either need to signal for a reshape (if + _update_provider_tree_for_memory_encryption () has no allocations) or + move the allocations within the ProviderTree if passed. + + :param provider_tree: The ProviderTree object for this host. + :param nodename: The ComputeNode.hypervisor_hostname, also known as + the name of the root node provider in the tree for this host. + :returns: boolean, whether we have MEM_ENCRYPTION_CONTEXT root + inventory. + """ + root_node = provider_tree.data(nodename) + return orc.MEM_ENCRYPTION_CONTEXT in root_node.inventory + + def _ensure_memory_encryption_providers(self, inventories_dict, + provider_tree, nodename): + """Ensures MEM_ENCRYPTION_CONTEXT inventory providers exist in the tree + for $nodename. + + MEM_ENCRYPTION_CONTEXT providers are named $nodename_$model, e.g. + ``somehost.foo.bar.com_amd_sev``. + + :param inventories_dict: Dictionary of inventories for + MEM_ENCRYPTION_CONTEXT class + directly provided by _get_memory_encryption_inventories() and which + looks like: + {'amd_sev': + {'total': $TOTAL, + 'min_unit': 1, + 'max_unit': 1, + 'step_size': 1, + 'reserved': 0, + 'allocation_ratio': 1.0, + 'traits': [ot.HW_CPU_X86_AMD_SEV], + } + } + :param provider_tree: The ProviderTree to update. + :param nodename: The ComputeNode.hypervisor_hostname, also known as + the name of the root node provider in the tree for this host. + :returns: dict, keyed by memory encryption model, to ProviderData + object representing that resource provider in the tree + """ + me_rps = {} + for me_id, inventory in inventories_dict.items(): + me_rp_name = '%s_%s' % (nodename, me_id) + if not inventory['total']: + if provider_tree.exists(me_rp_name): + provider_tree.remove(me_rp_name) + break + if not provider_tree.exists(me_rp_name): + provider_tree.new_child(me_rp_name, nodename) + me_rp = provider_tree.data(me_rp_name) + me_rps[me_id] = me_rp + me_traits = inventory.pop('traits', []) + me_inventory = {orc.MEM_ENCRYPTION_CONTEXT: inventory} + provider_tree.update_inventory(me_rp_name, me_inventory) + provider_tree.add_traits(me_rp_name, *me_traits) + return me_rps + + def _reshape_memory_encryption_resources( + self, allocations, root_node, me_rps): + for consumer_uuid, alloc_data in allocations.items(): + allocs = alloc_data['allocations'] + for rp_uuid in list(allocs): + resources = allocs[rp_uuid]['resources'] + if orc.MEM_ENCRYPTION_CONTEXT in resources: + self._reshape_memory_encryption_allocations( + rp_uuid, root_node, consumer_uuid, alloc_data, + resources, me_rps) + + def _reshape_memory_encryption_allocations( + self, rp_uuid, root_node, consumer_uuid, alloc_data, resources, + me_rps): + """Update existing MEM_ENCRYPTION_CONTEXT allocations by moving them + from the root node provider to the child provider for AMD SEV + + :param rp_uuid: UUID of the MEM_ENCRYPTION_CONTEXT resource provider + with allocations from consumer_uuid (should be the root node + provider before reshaping occurs) + :param root_node: ProviderData object for the root compute node + resource provider in the provider tree + :param consumer_uuid: UUID of the consumer (instance) with + MEM_ENCRYPTION_CONTEXT allocations against the resource provider + represented by rp_uuid + :param alloc_data: dict of allocation information for consumer_uuid + :param resources: dict, keyed by resource class, of resources allocated + to consumer_uuid from rp_uuid + :param me_rps: dict, keyed by memory encryption model, to ProviderData + object representing that resource provider in the tree + :raises: ReshapeFailed if the reshape fails for whatever reason + """ + self._assert_is_root_provider( + orc.MEM_ENCRYPTION_CONTEXT, rp_uuid, root_node, consumer_uuid, + alloc_data) + + sev_rp = None + for me_rp_name in me_rps: + if ot.HW_CPU_X86_AMD_SEV in me_rps[me_rp_name].traits: + sev_rp = me_rps[me_rp_name] + break + + if sev_rp is None: + msg = (_('MEM_ENCRYPTION_CONTEXT resources in the root provider ' + '%(rp_uuid)s are allocated by %(consumer_uuid)s but ' + 'the child resource provider for AMD SEV is not found.') + % {'rp_uuid': rp_uuid, 'consumer_uuid': consumer_uuid}) + raise exception.ReshapeFailed(error=msg) + + allocs = alloc_data['allocations'] + allocs[sev_rp.uuid] = { + 'resources': { + orc.MEM_ENCRYPTION_CONTEXT: 1 + } + } + del resources[orc.MEM_ENCRYPTION_CONTEXT] + + def _get_memory_encryption_inventories(self): + """Returns the inventories for MEM_ENCRYPTION_CONTEXT. + + :returns: dict, keyed by memory encryption model, of dicts like: + {'amd_sev': + {'total': $TOTAL, + 'min_unit': 1, + 'max_unit': 1, + 'step_size': 1, + 'reserved': 0, + 'allocation_ratio': 1.0, + 'traits': [ot.HW_CPU_X86_AMD_SEV] + } + } + """ conf_slots = CONF.libvirt.num_memory_encrypted_guests if not self._host.supports_amd_sev: @@ -9633,7 +9807,7 @@ class LibvirtDriver(driver.ComputeDriver): LOG.warning("Host is configured with " "libvirt.num_memory_encrypted_guests set to " "%d, but is not SEV-capable.", conf_slots) - return 0 + return {} slots = db_const.MAX_INT @@ -9648,8 +9822,18 @@ class LibvirtDriver(driver.ComputeDriver): "but supports only %d.", conf_slots, slots) slots = min(slots, conf_slots) - LOG.debug("Available memory encrypted slots: %d", slots) - return slots + LOG.debug("Available memory encrypted slots: AMD SEV=%d", slots) + return { + 'amd_sev': { + 'total': slots, + 'step_size': 1, + 'max_unit': 1, + 'min_unit': 1, + 'allocation_ratio': 1.0, + 'reserved': 0, + 'traits': [ot.HW_CPU_X86_AMD_SEV] + } + } @property def static_traits(self) -> ty.Dict[str, bool]: @@ -9749,12 +9933,13 @@ class LibvirtDriver(driver.ComputeDriver): @staticmethod def _assert_is_root_provider( - rp_uuid, root_node, consumer_uuid, alloc_data): + rc_name, rp_uuid, root_node, consumer_uuid, alloc_data): """Asserts during a reshape that rp_uuid is for the root node provider. When reshaping, inventory and allocations should be on the root node provider and then moved to child providers. + :param rc_name: Resource class name :param rp_uuid: UUID of the provider that holds inventory/allocations. :param root_node: ProviderData object representing the root node in a provider tree. @@ -9766,15 +9951,16 @@ class LibvirtDriver(driver.ComputeDriver): expected. """ if rp_uuid != root_node.uuid: - # Something is wrong - VGPU inventory should + # Something is wrong - the inventory should # only be on the root node provider if we are # reshaping the tree. - msg = (_('Unexpected VGPU resource allocation ' + msg = (_('Unexpected %(rc_name)s resource allocation ' 'on provider %(rp_uuid)s for consumer ' '%(consumer_uuid)s: %(alloc_data)s. ' - 'Expected VGPU allocation to be on root ' + 'Expected %(rc_name)s allocation to be on root ' 'compute node provider %(root_uuid)s.') - % {'rp_uuid': rp_uuid, + % {'rc_name': rc_name, + 'rp_uuid': rp_uuid, 'consumer_uuid': consumer_uuid, 'alloc_data': alloc_data, 'root_uuid': root_node.uuid}) @@ -9888,7 +10074,7 @@ class LibvirtDriver(driver.ComputeDriver): # We've found VGPU allocations on a provider. It should be the root # node provider. self._assert_is_root_provider( - rp_uuid, root_node, consumer_uuid, alloc_data) + orc.VGPU, rp_uuid, root_node, consumer_uuid, alloc_data) # Find which physical GPU corresponds to this allocation. mdev_uuids = self._get_assigned_mdevs_for_reshape( @@ -13281,7 +13467,6 @@ class LibvirtDriver(driver.ComputeDriver): :return: A dict of trait names mapped to boolean values. """ traits = self._get_cpu_feature_traits() - traits[ot.HW_CPU_X86_AMD_SEV] = self._host.supports_amd_sev traits[ot.HW_CPU_HYPERTHREADING] = self._host.has_hyperthreading traits.update(self._get_cpu_arch_traits()) traits.update(self._get_cpu_emulation_arch_traits())