From c59224d715a21998f40f72cf4e37efdc990e4d7e Mon Sep 17 00:00:00 2001 From: Balazs Gibizer Date: Mon, 6 Dec 2021 16:36:41 +0100 Subject: [PATCH] Reproduce bug 1953359 This patch adds a functional test that reproduces a race between incoming migration and the update_available_resource periodic Change-Id: I4be429c56aaa15ee12f448978c38214e741eae63 Related-Bug: #1953359 --- nova/tests/functional/integrated_helpers.py | 10 ++- .../functional/libvirt/test_numa_servers.py | 81 +++++++++++++++++++ 2 files changed, 90 insertions(+), 1 deletion(-) diff --git a/nova/tests/functional/integrated_helpers.py b/nova/tests/functional/integrated_helpers.py index 8710ecd768..70918bc5f5 100644 --- a/nova/tests/functional/integrated_helpers.py +++ b/nova/tests/functional/integrated_helpers.py @@ -441,7 +441,15 @@ class InstanceHelperMixin: """ # if forcing the server onto a host, we have to use the admin API if not api: - api = self.api if not az else getattr(self, 'admin_api', self.api) + api = self.api if not az and not host else getattr( + self, 'admin_api', self.api) + + if host and not api.microversion: + api.microversion = '2.74' + # with 2.74 networks param needs to use 'none' instead of None + # if no network is needed + if networks is None: + networks = 'none' body = self._build_server( name, image_uuid, flavor_id, networks, az, host) diff --git a/nova/tests/functional/libvirt/test_numa_servers.py b/nova/tests/functional/libvirt/test_numa_servers.py index 3bdf96e93f..2736214e64 100644 --- a/nova/tests/functional/libvirt/test_numa_servers.py +++ b/nova/tests/functional/libvirt/test_numa_servers.py @@ -929,6 +929,87 @@ class NUMAServersTest(NUMAServersTestBase): self._assert_pinned_cpus(src_host, 2) self._assert_pinned_cpus(dst_host, 0) + def test_resize_dedicated_policy_race_on_dest_bug_1953359(self): + + self.flags(cpu_dedicated_set='0-2', cpu_shared_set=None, + group='compute') + self.flags(vcpu_pin_set=None) + + host_info = fakelibvirt.HostInfo(cpu_nodes=1, cpu_sockets=1, + cpu_cores=2, cpu_threads=1) + self.start_compute(host_info=host_info, hostname='compute1') + + extra_spec = { + 'hw:cpu_policy': 'dedicated', + } + flavor_id = self._create_flavor(vcpu=1, extra_spec=extra_spec) + expected_usage = {'DISK_GB': 20, 'MEMORY_MB': 2048, 'PCPU': 1} + + server = self._run_build_test(flavor_id, expected_usage=expected_usage) + + inst = objects.Instance.get_by_uuid(self.ctxt, server['id']) + self.assertEqual(1, len(inst.numa_topology.cells)) + # assert that the pcpu 0 is used on compute1 + self.assertEqual({'0': 0}, inst.numa_topology.cells[0].cpu_pinning_raw) + + # start another compute with the same config + self.start_compute(host_info=host_info, hostname='compute2') + + # boot another instance but now on compute2 so that it occupies the + # pcpu 0 on compute2 + # NOTE(gibi): _run_build_test cannot be used here as it assumes only + # compute1 exists + server2 = self._create_server( + flavor_id=flavor_id, + host='compute2', + ) + inst2 = objects.Instance.get_by_uuid(self.ctxt, server2['id']) + self.assertEqual(1, len(inst2.numa_topology.cells)) + # assert that the pcpu 0 is used + self.assertEqual( + {'0': 0}, inst2.numa_topology.cells[0].cpu_pinning_raw) + + # migrate the first instance from compute1 to compute2 but stop + # migrating at the start of finish_resize. Then start a racing periodic + # update_available_resources. + + def fake_finish_resize(*args, **kwargs): + # start a racing update_available_resource periodic + self._run_periodics() + # we expect it that CPU pinning fails on the destination node + # as the resource_tracker will use the source node numa_topology + # and that does not fit to the dest node as pcpu 0 in the dest + # is already occupied. + + # TODO(stephenfin): The mock of 'migrate_disk_and_power_off' should + # probably be less...dumb + with mock.patch('nova.virt.libvirt.driver.LibvirtDriver' + '.migrate_disk_and_power_off', return_value='{}'): + with mock.patch( + 'nova.compute.manager.ComputeManager.finish_resize' + ) as mock_finish_resize: + mock_finish_resize.side_effect = fake_finish_resize + post = {'migrate': None} + self.admin_api.post_server_action(server['id'], post) + + log = self.stdlog.logger.output + # The resize_claim correctly calculates that the inst1 should be pinned + # to pcpu id 1 instead of 0 + self.assertIn( + 'Computed NUMA topology CPU pinning: usable pCPUs: [[1]], ' + 'vCPUs mapping: [(0, 1)]', + log, + ) + # But the periodic fails as it tries to apply the source topology on + # the dest. This is bug 1953359. + log = self.stdlog.logger.output + self.assertIn('Error updating resources for node compute2', log) + self.assertIn( + 'nova.exception.CPUPinningInvalid: CPU set to pin [0] must be ' + 'a subset of free CPU set [1]', + log, + ) + class NUMAServerTestWithCountingQuotaFromPlacement(NUMAServersTest):