From 7e8a9786dc3d330b79970f398e73fa596dc9445a Mon Sep 17 00:00:00 2001 From: Balazs Gibizer Date: Wed, 5 Aug 2020 15:34:32 +0200 Subject: [PATCH] Only unplug vif after the device is detached from libvirt There is a potential race between unplugging a vif and removing the related device from the domain XML. During macvtap (hw_veb) unplug nova tries to reset the MAC and VLAN config of the VF used for the macvtap. However as it is done before the macvtap device is removed from the domain, libvirt still feels responsible to the VF too and restores the configuration. This leads to a situation where the macvtap is removed successfully but the VF behind it left configured with the MAC address and VLAN id of the neutron port. This patch changes the order of the operations. First the device is detached from libvirt and then the vif is unplugged. Part of blueprint sriov-interface-attach-detach Change-Id: Iea126857725502dc2eef6e53894d8755d0e2e7f4 --- nova/tests/unit/virt/libvirt/test_driver.py | 7 +++++-- nova/virt/libvirt/driver.py | 9 ++++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index de41339bcc..479d99db7f 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -23480,11 +23480,12 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): mock.patch.object(guest, 'get_interface_by_cfg', side_effect=get_interface_calls), mock.patch.object(domain, 'detachDeviceFlags'), - mock.patch('nova.virt.libvirt.driver.LOG.warning') + mock.patch('nova.virt.libvirt.driver.LOG.warning'), + mock.patch.object(self.drvr.vif_driver, 'unplug') ) as ( mock_get_guest, mock_get_config, mock_get_interface, mock_detach_device_flags, - mock_warning + mock_warning, mock_unplug ): # run the detach method self.drvr.detach_interface(self.context, instance, network_info[0]) @@ -23506,6 +23507,8 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): expected_cfg.to_xml(), flags=expected_flags) mock_warning.assert_not_called() + mock_unplug.assert_called_once_with(instance, network_info[0]) + def test_detach_interface_with_running_instance(self): self._test_detach_interface( power_state.RUNNING, diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 0cd24aa7d0..c5f59fd482 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -2240,7 +2240,6 @@ class LibvirtDriver(driver.ComputeDriver): CONF.libvirt.virt_type, self._host) interface = guest.get_interface_by_cfg(cfg) try: - self.vif_driver.unplug(instance, vif) # NOTE(mriedem): When deleting an instance and using Neutron, # we can be racing against Neutron deleting the port and # sending the vif-deleted event which then triggers a call to @@ -2320,6 +2319,14 @@ class LibvirtDriver(driver.ComputeDriver): LOG.warning('Detaching interface %(mac)s failed because ' 'the device is no longer found on the guest.', {'mac': mac}, instance=instance) + finally: + # NOTE(gibi): we need to unplug the vif _after_ the detach is done + # on the libvirt side as otherwise libvirt will still manage the + # device that our unplug code trying to reset. This can cause a + # race and leave the detached device configured. Also even if we + # are failed to detach due to race conditions the unplug is + # necessary for the same reason + self.vif_driver.unplug(instance, vif) def _create_snapshot_metadata(self, image_meta, instance, img_fmt, snp_name):