diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 7b7da4fd56..022128612e 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -9087,8 +9087,20 @@ class ComputeManager(manager.Manager): try: allocs = self.reportclient.get_allocations_for_consumer( ctxt, instance.uuid) - migrate_data = self.compute_rpcapi.check_can_live_migrate_source( - ctxt, instance, dest_check_data) + try: + migrate_data = ( + self.compute_rpcapi.check_can_live_migrate_source( + ctxt, instance, dest_check_data) + ) + except Exception as ex: + msg = _("Error while check_can_live_migrate_source from " + "%(src)s to host %(dest)s: %(ex_type)s %(ex)s") % { + 'src': instance.host, + 'dest': CONF.host, + 'ex_type': type(ex).__name__, + 'ex': ex + } + raise exception.MigrationPreCheckError(msg) if ('src_supports_numa_live_migration' in migrate_data and migrate_data.src_supports_numa_live_migration): migrate_data = self._live_migration_claim( diff --git a/nova/tests/functional/regressions/test_bug_2044235.py b/nova/tests/functional/regressions/test_bug_2044235.py index c81847f58f..71f99c8f67 100644 --- a/nova/tests/functional/regressions/test_bug_2044235.py +++ b/nova/tests/functional/regressions/test_bug_2044235.py @@ -57,5 +57,4 @@ class TestMessagingTimeoutDuringLiveMigrationCheck( self._live_migrate, server, "failed" ) - # bug lp-2044235 - instance is in ERROR but it should not - self._wait_for_state_change(server, "ERROR") + self._wait_for_state_change(server, "ACTIVE") diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index e0c492fa15..fe7d236d5e 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -5233,7 +5233,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, 'nova.network.neutron.API.has_port_binding_extension', lambda *args: True)) self.assertRaises( - test.TestingException, + exception.MigrationPreCheckError, self._test_check_can_live_migrate_destination, do_raise=True) @@ -5414,7 +5414,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, 'check_can_live_migrate_source', side_effect=messaging.MessagingTimeout): self.assertRaises( - messaging.MessagingTimeout, + exception.MigrationPreCheckError, self.compute.check_can_live_migrate_destination, self.context, instance, None, None, None, None) _do_test() diff --git a/releasenotes/notes/bug-2044235-reset-instance-after-rpc-issue-during-live-migration.yaml b/releasenotes/notes/bug-2044235-reset-instance-after-rpc-issue-during-live-migration.yaml new file mode 100644 index 0000000000..7f8743afaf --- /dev/null +++ b/releasenotes/notes/bug-2044235-reset-instance-after-rpc-issue-during-live-migration.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + Fixed the issue + `bug 2044235 `__ where Nova + Conductor puts an instance into an error state if any errors occur during + execution of the 'check_can_live_migrate_source()' method in an RPC call. + Now, any error is caught and a MigrationPreCheckError exception is re-raised + to reset the instance state.