diff --git a/nova/compute/manager.py b/nova/compute/manager.py index bc1a991b6b..237d9d94d3 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -8552,6 +8552,18 @@ class ComputeManager(manager.Manager): None """ + # NOTE(gibi): We need to refresh pci_requests of the instance as it + # might be changed by the conductor during scheduling based on the + # selected destination host. If the instance has SRIOV ports with + # resource request then the LiveMigrationTask._find_destination call + # updated the instance.pci_requests.requests[].spec with the SRIOV PF + # device name to be used on the destination host. As the migration is + # rolling back to the source host now we don't want to persist the + # destination host related changes in the DB. + instance.pci_requests = \ + objects.InstancePCIRequests.get_by_instance_uuid( + context, instance.uuid) + if (isinstance(migrate_data, migrate_data_obj.LiveMigrateData) and migrate_data.obj_attr_is_set('migration')): migration = migrate_data.migration diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py index 9993202f94..54d5f63180 100644 --- a/nova/compute/resource_tracker.py +++ b/nova/compute/resource_tracker.py @@ -156,8 +156,7 @@ class ResourceTracker(object): instance=instance) cn = self.compute_nodes[nodename] - pci_requests = objects.InstancePCIRequests.get_by_instance_uuid( - context, instance.uuid) + pci_requests = instance.pci_requests claim = claims.Claim(context, instance, nodename, self, cn, pci_requests, limits=limits) diff --git a/nova/compute/utils.py b/nova/compute/utils.py index c9b3130dea..ea9ca36d01 100644 --- a/nova/compute/utils.py +++ b/nova/compute/utils.py @@ -1509,7 +1509,6 @@ def update_pci_request_spec_with_allocated_interface_name( return (pci_request.requester_id and pci_request.requester_id in mapping) - modified = False for pci_request in instance.pci_requests.requests: if needs_update(pci_request, provider_mapping): @@ -1535,6 +1534,3 @@ def update_pci_request_spec_with_allocated_interface_name( for spec in pci_request.spec: spec['parent_ifname'] = rp_name_pieces[2] - modified = True - if modified: - instance.save() diff --git a/nova/conductor/tasks/live_migrate.py b/nova/conductor/tasks/live_migrate.py index 2b258fcc83..cf53353195 100644 --- a/nova/conductor/tasks/live_migrate.py +++ b/nova/conductor/tasks/live_migrate.py @@ -9,7 +9,6 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. -import copy from oslo_log import log as logging import oslo_messaging as messaging @@ -68,7 +67,6 @@ class LiveMigrationTask(base.TaskBase): self._source_cn = None self._held_allocations = None self.network_api = neutron.API() - self.instance_pci_request = None def _execute(self): self._check_instance_is_active() @@ -84,11 +82,6 @@ class LiveMigrationTask(base.TaskBase): self.instance, self.migration)) - # NOTE(gibi): migrating with qos SRIOV ports will update the PCI - # request so we saving the request here so that we can roll that change - # back if the migration fails. - self.instance_pci_request = copy.deepcopy(self.instance.pci_requests) - if not self.destination: # Either no host was specified in the API request and the user # wants the scheduler to pick a destination host, or a host was @@ -158,9 +151,6 @@ class LiveMigrationTask(base.TaskBase): self._source_cn, self.instance, self.migration) - if self.instance_pci_request: - self.instance.pci_requests = self.instance_pci_request - self.instance.save() def _check_instance_is_active(self): if self.instance.power_state not in (power_state.RUNNING, @@ -521,6 +511,10 @@ class LiveMigrationTask(base.TaskBase): provider_mapping = request_spec.get_request_group_mapping() if provider_mapping: + # NOTE(gibi): this call might update the pci_requests of the + # instance based on the destination host if so then such change + # will be persisted when post_live_migration_at_destination + # runs. compute_utils.\ update_pci_request_spec_with_allocated_interface_name( self.context, self.report_client, self.instance, diff --git a/nova/tests/functional/test_servers.py b/nova/tests/functional/test_servers.py index d0cf4766a5..488cc41894 100644 --- a/nova/tests/functional/test_servers.py +++ b/nova/tests/functional/test_servers.py @@ -6653,6 +6653,8 @@ class ServerMoveWithPortResourceRequestTest( migration_uuid, source_compute_rp_uuid=self.compute1_rp_uuid, new_flavor=new_flavor) + self._assert_pci_request_pf_device_name(server, 'host2-ens2') + self._confirm_resize(server) # check that allocation is still OK @@ -6789,6 +6791,8 @@ class ServerMoveWithPortResourceRequestTest( source_compute_rp_uuid=self.compute1_rp_uuid, new_flavor=new_flavor) + self._assert_pci_request_pf_device_name(server, 'host3-ens2') + self._confirm_resize(server) # check that allocation is still OK @@ -7141,6 +7145,8 @@ class ServerMoveWithPortResourceRequestTest( self.compute2_rp_uuid, non_qos_normal_port, qos_normal_port, qos_sriov_port) + self._assert_pci_request_pf_device_name(server, 'host2-ens2') + # recover source compute self.admin_api.put_service( self.compute1_service_id, {'forced_down': 'false'}) @@ -7300,6 +7306,8 @@ class ServerMoveWithPortResourceRequestTest( server, self.compute2_rp_uuid, non_qos_normal_port, qos_normal_port, qos_sriov_port, self.flavor_with_group_policy) + self._assert_pci_request_pf_device_name(server, 'host2-ens2') + self._delete_server_and_check_allocations( server, qos_normal_port, qos_sriov_port) @@ -7364,6 +7372,8 @@ class ServerMoveWithPortResourceRequestTest( server, compute3_rp_uuid, non_qos_normal_port, qos_normal_port, qos_sriov_port, self.flavor_with_group_policy) + self._assert_pci_request_pf_device_name(server, 'host3-ens2') + self._delete_server_and_check_allocations( server, qos_normal_port, qos_sriov_port) @@ -7529,10 +7539,7 @@ class LiveMigrateAbortWithPortResourceRequestTest( qos_normal_port, qos_sriov_port, self.flavor_with_group_policy) # Assert that the InstancePCIRequests rolled back to point to host1 - # This assert is fails now as the abort does not change the PCI device - # back - # TODO(gibi): come up with an idea to fix this - # self._assert_pci_request_pf_device_name(server, 'host1-ens2') + self._assert_pci_request_pf_device_name(server, 'host1-ens2') self._delete_server_and_check_allocations( server, qos_normal_port, qos_sriov_port) diff --git a/nova/tests/unit/compute/test_compute.py b/nova/tests/unit/compute/test_compute.py index f2d6394bf5..7e52aea5a5 100644 --- a/nova/tests/unit/compute/test_compute.py +++ b/nova/tests/unit/compute/test_compute.py @@ -6578,6 +6578,7 @@ class ComputeTestCase(BaseTestCase, bdms = objects.BlockDeviceMappingList() mock_bdms.return_value = bdms + @mock.patch('nova.objects.InstancePCIRequests.get_by_instance_uuid') @mock.patch('nova.compute.utils.notify_about_instance_action') @mock.patch.object(migration, 'save') @mock.patch.object(self.compute, '_revert_allocation') @@ -6586,7 +6587,8 @@ class ComputeTestCase(BaseTestCase, @mock.patch.object(compute_rpcapi.ComputeAPI, 'drop_move_claim_at_destination') def _test(mock_drop_claim, mock_nw_api, mock_lmcf, mock_ra, - mock_mig_save, mock_notify): + mock_mig_save, mock_notify, mock_get_pci): + mock_get_pci.return_value = objects.InstancePCIRequests() mock_lmcf.return_value = False, False if migration_status: self.compute._rollback_live_migration( @@ -6611,6 +6613,8 @@ class ComputeTestCase(BaseTestCase, ]) mock_ra.assert_called_once_with(mock.ANY, instance, migration) mock_mig_save.assert_called_once_with() + mock_get_pci.assert_called_once_with(c, instance.uuid) + self.assertEqual(mock_get_pci.return_value, instance.pci_requests) _test() @@ -6637,6 +6641,7 @@ class ComputeTestCase(BaseTestCase, migrate_data = objects.LibvirtLiveMigrateData(migration=migration) source_bdms = objects.BlockDeviceMappingList() + @mock.patch('nova.objects.InstancePCIRequests.get_by_instance_uuid') @mock.patch.object(self.compute, '_notify_about_instance_usage') @mock.patch('nova.compute.utils.notify_about_instance_action') @mock.patch.object(instance, 'save') @@ -6647,7 +6652,10 @@ class ComputeTestCase(BaseTestCase, @mock.patch.object(self.compute.network_api, 'setup_networks_on_host', side_effect=(None, test.TestingException)) def _test(mock_nw_setup, _mock_lmcf, mock_ra, mock_mig_save, - mock_inst_save, _mock_notify_action, mock_notify_usage): + mock_inst_save, _mock_notify_action, mock_notify_usage, + mock_get_pci): + mock_get_pci.return_value = objects.InstancePCIRequests() + self.assertRaises(test.TestingException, self.compute._rollback_live_migration, ctxt, instance, 'dest-host', migrate_data, @@ -6672,6 +6680,8 @@ class ComputeTestCase(BaseTestCase, # Since we failed during rollback, the migration status gets set # to 'error' instead of 'goofballs'. self.assertEqual('error', migration.status) + mock_get_pci.assert_called_once_with(ctxt, instance.uuid) + self.assertEqual(mock_get_pci.return_value, instance.pci_requests) _test() diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index 14a8991ec9..abe248ff6a 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -9546,6 +9546,7 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, bdm.connection_info = new_attachment_id bdms = objects.BlockDeviceMappingList(objects=[bdm]) + @mock.patch('nova.objects.InstancePCIRequests.get_by_instance_uuid') @mock.patch.object(compute.volume_api, 'attachment_delete') @mock.patch.object(compute_utils, 'notify_about_instance_action') @mock.patch.object(instance, 'save') @@ -9557,11 +9558,12 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, @mock.patch.object(objects.Instance, 'drop_migration_context') def _test(mock_drop_mig_ctxt, mock_get_bdms, mock_net_api, mock_remove_conn, mock_usage, mock_instance_save, - mock_action, mock_attach_delete): + mock_action, mock_attach_delete, mock_get_pci): # this tests that _rollback_live_migration replaces the bdm's # attachment_id with the original attachment id that is in # migrate_data. mock_get_bdms.return_value = bdms + mock_get_pci.return_value = objects.InstancePCIRequests() compute._rollback_live_migration(self.context, instance, None, migrate_data=migrate_data, @@ -9575,6 +9577,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, self.assertEqual(orig_attachment_id, bdm.connection_info) bdm.save.assert_called_once_with() mock_drop_mig_ctxt.assert_called_once_with() + mock_get_pci.assert_called_once_with(self.context, instance.uuid) + self.assertEqual(mock_get_pci.return_value, instance.pci_requests) _test() @@ -9591,13 +9595,14 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, migration=self.migration, is_shared_instance_path=True, is_shared_block_storage=True) + @mock.patch('nova.objects.InstancePCIRequests.get_by_instance_uuid') @mock.patch.object(self.compute, '_revert_allocation') @mock.patch.object(self.instance, 'save') @mock.patch.object(self.compute, 'network_api') @mock.patch.object(self.compute, '_notify_about_instance_usage') @mock.patch.object(objects.Instance, 'drop_migration_context') def _do_test(drop_mig_ctxt, legacy_notify, network_api, instance_save, - _revert_allocation): + _revert_allocation, mock_get_pci): # setup_networks_on_host is called two times: # 1. set the migrating_to attribute in the port binding profile, # which is a no-op in this case for neutron @@ -9606,6 +9611,7 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, None, exception.PortBindingDeletionFailed( port_id=uuids.port_id, host='fake-dest-host')] + mock_get_pci.return_value = objects.InstancePCIRequests() self.compute._rollback_live_migration( self.context, self.instance, 'fake-dest-host', migrate_data, source_bdms=objects.BlockDeviceMappingList()) @@ -9613,6 +9619,10 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, self.assertIn('Network cleanup failed for destination host', mock_log_error.call_args[0][0]) drop_mig_ctxt.assert_called_once_with() + mock_get_pci.assert_called_once_with( + self.context, self.instance.uuid) + self.assertEqual( + mock_get_pci.return_value, self.instance.pci_requests) _do_test() diff --git a/nova/tests/unit/compute/test_resource_tracker.py b/nova/tests/unit/compute/test_resource_tracker.py index 78d5772fae..e2d4457cd0 100644 --- a/nova/tests/unit/compute/test_resource_tracker.py +++ b/nova/tests/unit/compute/test_resource_tracker.py @@ -1948,13 +1948,11 @@ class TestInstanceClaim(BaseTestCase): self.assertIsInstance(claim, claims.NopClaim) @mock.patch('nova.compute.utils.is_volume_backed_instance') - @mock.patch('nova.objects.InstancePCIRequests.get_by_instance_uuid') @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node') - def test_update_usage_with_claim(self, migr_mock, pci_mock, - check_bfv_mock): + def test_update_usage_with_claim(self, migr_mock, check_bfv_mock): # Test that RT.update_usage() only changes the compute node # resources if there has been a claim first. - pci_mock.return_value = objects.InstancePCIRequests(requests=[]) + self.instance.pci_requests = objects.InstancePCIRequests(requests=[]) check_bfv_mock.return_value = False expected = copy.deepcopy(_COMPUTE_NODE_FIXTURES[0]) @@ -1990,12 +1988,11 @@ class TestInstanceClaim(BaseTestCase): self.assertTrue(obj_base.obj_equal_prims(expected, cn)) @mock.patch('nova.compute.utils.is_volume_backed_instance') - @mock.patch('nova.objects.InstancePCIRequests.get_by_instance_uuid') @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node') - def test_update_usage_removed(self, migr_mock, pci_mock, check_bfv_mock): + def test_update_usage_removed(self, migr_mock, check_bfv_mock): # Test that RT.update_usage() removes the instance when update is # called in a removed state - pci_mock.return_value = objects.InstancePCIRequests(requests=[]) + self.instance.pci_requests = objects.InstancePCIRequests(requests=[]) check_bfv_mock.return_value = False cn = self.rt.compute_nodes[_NODENAME] allocations = { @@ -2067,10 +2064,9 @@ class TestInstanceClaim(BaseTestCase): self.assertEqual(0, len(self.rt.assigned_resources[cn.uuid][rc])) @mock.patch('nova.compute.utils.is_volume_backed_instance') - @mock.patch('nova.objects.InstancePCIRequests.get_by_instance_uuid') @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node') - def test_claim(self, migr_mock, pci_mock, check_bfv_mock): - pci_mock.return_value = objects.InstancePCIRequests(requests=[]) + def test_claim(self, migr_mock, check_bfv_mock): + self.instance.pci_requests = objects.InstancePCIRequests(requests=[]) check_bfv_mock.return_value = False disk_used = self.instance.root_gb + self.instance.ephemeral_gb @@ -2108,9 +2104,8 @@ class TestInstanceClaim(BaseTestCase): @mock.patch('nova.compute.utils.is_volume_backed_instance') @mock.patch('nova.pci.stats.PciDeviceStats.support_requests', return_value=True) - @mock.patch('nova.objects.InstancePCIRequests.get_by_instance_uuid') @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node') - def test_claim_with_pci(self, migr_mock, pci_mock, pci_stats_mock, + def test_claim_with_pci(self, migr_mock, pci_stats_mock, check_bfv_mock): # Test that a claim involving PCI requests correctly claims # PCI devices on the host and sends an updated pci_device_pools @@ -2130,7 +2125,7 @@ class TestInstanceClaim(BaseTestCase): pci_requests = objects.InstancePCIRequests( requests=[request], instance_uuid=self.instance.uuid) - pci_mock.return_value = pci_requests + self.instance.pci_requests = pci_requests check_bfv_mock.return_value = False disk_used = self.instance.root_gb + self.instance.ephemeral_gb @@ -2164,10 +2159,8 @@ class TestInstanceClaim(BaseTestCase): @mock.patch('nova.compute.utils.is_volume_backed_instance', new=mock.Mock(return_value=False)) - @mock.patch('nova.objects.InstancePCIRequests.get_by_instance_uuid', - new=mock.Mock(return_value=objects.InstancePCIRequests( - requests=[]))) def test_claim_with_resources(self): + self.instance.pci_requests = objects.InstancePCIRequests(requests=[]) cn = self.rt.compute_nodes[_NODENAME] allocations = { cn.uuid: { @@ -2192,10 +2185,8 @@ class TestInstanceClaim(BaseTestCase): @mock.patch('nova.compute.utils.is_volume_backed_instance', new=mock.Mock(return_value=False)) - @mock.patch('nova.objects.InstancePCIRequests.get_by_instance_uuid', - new=mock.Mock(return_value=objects.InstancePCIRequests( - requests=[]))) def test_claim_with_resources_from_free(self): + self.instance.pci_requests = objects.InstancePCIRequests(requests=[]) cn = self.rt.compute_nodes[_NODENAME] self.rt.assigned_resources = { self.resource_1.provider_uuid: { @@ -2222,10 +2213,8 @@ class TestInstanceClaim(BaseTestCase): @mock.patch('nova.compute.utils.is_volume_backed_instance', new=mock.Mock(return_value=False)) - @mock.patch('nova.objects.InstancePCIRequests.get_by_instance_uuid', - new=mock.Mock(return_value=objects.InstancePCIRequests( - requests=[]))) def test_claim_failed_with_resources(self): + self.instance.pci_requests = objects.InstancePCIRequests(requests=[]) cn = self.rt.compute_nodes[_NODENAME] # Only one "CUSTOM_RESOURCE_0" resource is available allocations = { @@ -2248,12 +2237,11 @@ class TestInstanceClaim(BaseTestCase): @mock.patch('nova.compute.resource_tracker.ResourceTracker.' '_sync_compute_service_disabled_trait', new=mock.Mock()) @mock.patch('nova.compute.utils.is_volume_backed_instance') - @mock.patch('nova.objects.InstancePCIRequests.get_by_instance_uuid') @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node') @mock.patch('nova.objects.ComputeNode.save') - def test_claim_abort_context_manager(self, save_mock, migr_mock, pci_mock, + def test_claim_abort_context_manager(self, save_mock, migr_mock, check_bfv_mock): - pci_mock.return_value = objects.InstancePCIRequests(requests=[]) + self.instance.pci_requests = objects.InstancePCIRequests(requests=[]) check_bfv_mock.return_value = False cn = self.rt.compute_nodes[_NODENAME] @@ -2293,11 +2281,10 @@ class TestInstanceClaim(BaseTestCase): @mock.patch('nova.compute.resource_tracker.ResourceTracker.' '_sync_compute_service_disabled_trait', new=mock.Mock()) @mock.patch('nova.compute.utils.is_volume_backed_instance') - @mock.patch('nova.objects.InstancePCIRequests.get_by_instance_uuid') @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node') @mock.patch('nova.objects.ComputeNode.save') - def test_claim_abort(self, save_mock, migr_mock, pci_mock, check_bfv_mock): - pci_mock.return_value = objects.InstancePCIRequests(requests=[]) + def test_claim_abort(self, save_mock, migr_mock, check_bfv_mock): + self.instance.pci_requests = objects.InstancePCIRequests(requests=[]) check_bfv_mock.return_value = False disk_used = self.instance.root_gb + self.instance.ephemeral_gb @@ -2336,11 +2323,10 @@ class TestInstanceClaim(BaseTestCase): self.assertEqual(0, cn.running_vms) @mock.patch('nova.compute.utils.is_volume_backed_instance') - @mock.patch('nova.objects.InstancePCIRequests.get_by_instance_uuid') @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node') @mock.patch('nova.objects.ComputeNode.save') - def test_claim_numa(self, save_mock, migr_mock, pci_mock, check_bfv_mock): - pci_mock.return_value = objects.InstancePCIRequests(requests=[]) + def test_claim_numa(self, save_mock, migr_mock, check_bfv_mock): + self.instance.pci_requests = objects.InstancePCIRequests(requests=[]) check_bfv_mock.return_value = False cn = self.rt.compute_nodes[_NODENAME] @@ -2472,8 +2458,6 @@ class TestResize(BaseTestCase): '_sync_compute_service_disabled_trait', new=mock.Mock()) @mock.patch('nova.compute.utils.is_volume_backed_instance', return_value=False) - @mock.patch('nova.objects.InstancePCIRequests.get_by_instance_uuid', - return_value=objects.InstancePCIRequests(requests=[])) @mock.patch('nova.objects.InstancePCIRequests.get_by_instance', return_value=objects.InstancePCIRequests(requests=[])) @mock.patch('nova.objects.PciDeviceList.get_by_compute_node', @@ -2492,7 +2476,6 @@ class TestResize(BaseTestCase): get_by_host_and_nodename_mock, pci_get_by_compute_node_mock, pci_get_by_instance_mock, - pci_get_by_instance_uuid_mock, is_bfv_mock, revert=False): @@ -2513,6 +2496,7 @@ class TestResize(BaseTestCase): instance = _INSTANCE_FIXTURES[0].obj_clone() old_flavor = instance.flavor instance.new_flavor = _INSTANCE_TYPE_OBJ_FIXTURES[2] + instance.pci_requests = objects.InstancePCIRequests(requests=[]) # allocations for create allocations = {