From 267a40663cd8d0b94bbc5ebda4ece55a45753b64 Mon Sep 17 00:00:00 2001 From: Kashyap Chamarthy Date: Thu, 28 Jan 2021 16:35:10 +0100 Subject: [PATCH] libvirt: Add a workaround to skip compareCPU() on destination Nova's use of libvirt's compareCPU() API served its purpose over the years, but its design limitations break live migration in subtle ways. For example, the compareCPU() API compares against the host physical CPUID. Some of the features from this CPUID aren not exposed by KVM, and then there are some features that KVM emulates that are not in the host CPUID. The latter can cause bogus live migration failures. With QEMU >=2.9 and libvirt >= 4.4.0, libvirt will do the right thing in terms of CPU compatibility checks on the destination host during live migration. Nova satisfies these minimum version requirements by a good margin. So, provide a workaround to skip the CPU comparison check on the destination host before migrating a guest, and let libvirt handle it correctly. This workaround will be removed once Nova replaces the older libvirt APIs with their newer and improved counterparts[1][2]. - - - Note that Nova's libvirt driver calls compareCPU() in another method, _check_cpu_compatibility(); I did not remove its usage yet. As it needs more careful combing of the code, and then: - where possible, remove the usage of compareCPU() altogether, and rely on libvirt doing the right thing under the hood; or - where Nova _must_ do the CPU comparison checks, switch to the better libvirt CPU APIs -- baselineHypervisorCPU() and compareHypervisorCPU() -- that are described here[1]. This is work in progress[2]. [1] https://opendev.org/openstack/nova-specs/commit/70811da221035044e27 [2] https://review.opendev.org/q/topic:bp%252Fcpu-selection-with-hypervisor-consideration Change-Id: I444991584118a969e9ea04d352821b07ec0ba88d Closes-Bug: #1913716 Signed-off-by: Kashyap Chamarthy Signed-off-by: Balazs Gibizer --- nova/conf/workarounds.py | 8 +++++++ nova/tests/unit/virt/libvirt/test_driver.py | 19 +++++++++++++++ nova/virt/libvirt/driver.py | 19 ++++++++------- ...-compare-cpu-on-dest-6ae419ddd61fd0f8.yaml | 24 +++++++++++++++++++ 4 files changed, 61 insertions(+), 9 deletions(-) create mode 100644 releasenotes/notes/skip-compare-cpu-on-dest-6ae419ddd61fd0f8.yaml diff --git a/nova/conf/workarounds.py b/nova/conf/workarounds.py index 7419f073b4..6c52eae8e5 100644 --- a/nova/conf/workarounds.py +++ b/nova/conf/workarounds.py @@ -401,6 +401,14 @@ VCPU and PCPU resource usage counts should set this option to True. Related options: * :oslo.config:option:`quota.driver` +"""), + cfg.BoolOpt('skip_cpu_compare_on_dest', + default=False, + help=""" +With the libvirt driver, during live migration, skip comparing guest CPU +with the destination host. When using QEMU >= 2.9 and libvirt >= +4.4.0, libvirt will do the correct thing with respect to checking CPU +compatibility on the destination host during live migration. """), ] diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index fd3d322b19..5632fcba86 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -10915,6 +10915,25 @@ class LibvirtConnTestCase(test.NoDBTestCase, 'serial_listen_addr': None}, result.obj_to_primitive()['nova_object.data']) + @mock.patch( + 'nova.network.neutron.API.has_port_binding_extension', + new=mock.Mock(return_value=False)) + @mock.patch.object(libvirt_driver.LibvirtDriver, + '_create_shared_storage_test_file', + return_value='fake') + @mock.patch.object(libvirt_driver.LibvirtDriver, '_compare_cpu') + def test_check_can_live_migrate_guest_cpu_none_model_skip_compare( + self, mock_cpu, mock_test_file): + self.flags(group='workarounds', skip_cpu_compare_on_dest=True) + instance_ref = objects.Instance(**self.test_instance) + instance_ref.vcpu_model = test_vcpu_model.fake_vcpumodel + instance_ref.vcpu_model.model = None + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + compute_info = {'cpu_info': 'asdf', 'disk_available_least': 1} + drvr.check_can_live_migrate_destination( + self.context, instance_ref, compute_info, compute_info) + mock_cpu.assert_not_called() + @mock.patch( 'nova.network.neutron.API.has_port_binding_extension', new=mock.Mock(return_value=False)) diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 28e1bf49ac..cc8b7098ab 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -9330,15 +9330,16 @@ class LibvirtDriver(driver.ComputeDriver): disk_available_mb = ( (disk_available_gb * units.Ki) - CONF.reserved_host_disk_mb) - # Compare CPU - try: - if not instance.vcpu_model or not instance.vcpu_model.model: - source_cpu_info = src_compute_info['cpu_info'] - self._compare_cpu(None, source_cpu_info, instance) - else: - self._compare_cpu(instance.vcpu_model, None, instance) - except exception.InvalidCPUInfo as e: - raise exception.MigrationPreCheckError(reason=e) + if not CONF.workarounds.skip_cpu_compare_on_dest: + # Compare CPU + try: + if not instance.vcpu_model or not instance.vcpu_model.model: + source_cpu_info = src_compute_info['cpu_info'] + self._compare_cpu(None, source_cpu_info, instance) + else: + self._compare_cpu(instance.vcpu_model, None, instance) + except exception.InvalidCPUInfo as e: + raise exception.MigrationPreCheckError(reason=e) # Create file on storage, to be checked on source host filename = self._create_shared_storage_test_file(instance) diff --git a/releasenotes/notes/skip-compare-cpu-on-dest-6ae419ddd61fd0f8.yaml b/releasenotes/notes/skip-compare-cpu-on-dest-6ae419ddd61fd0f8.yaml new file mode 100644 index 0000000000..e7cd4041b1 --- /dev/null +++ b/releasenotes/notes/skip-compare-cpu-on-dest-6ae419ddd61fd0f8.yaml @@ -0,0 +1,24 @@ +--- +issues: + - | + Nova's use of libvirt's compareCPU() API served its purpose over the + years, but its design limitations break live migration in subtle + ways. For example, the compareCPU() API compares against the host + physical CPUID. Some of the features from this CPUID aren not + exposed by KVM, and then there are some features that KVM emulates + that are not in the host CPUID. The latter can cause bogus live + migration failures. + + With QEMU >=2.9 and libvirt >= 4.4.0, libvirt will do the right + thing in terms of CPU compatibility checks on the destination host + during live migration. Nova satisfies these minimum version + requirements by a good margin. So, this workaround provides a way to + skip the CPU comparison check on the destination host before + migrating a guest, and let libvirt handle it correctly. + + This workaround will be deprecated and removed once Nova replaces + the older libvirt APIs with their newer counterparts. The work is + being tracked via this `blueprint + cpu-selection-with-hypervisor-consideration`_. + + .. _blueprint cpu-selection-with-hypervisor-consideration: https://blueprints.launchpad.net/nova/+spec/cpu-selection-with-hypervisor-consideration