Support live migration with qos ports

This patch enhances the live_migration task in the conductor to support
qos ports during live migration. The high level sequence of events are
the following:
* when request spec is gathered before the scheduler call the resource
requests are collected from neutron ports and the request spec is
updated
* after a successful scheduling the request group - resource provider
mapping is calculated
* the instance pci requests are updated to drive the pci claim on the
target host to allocate VFs from the same PCI PF the bandwidth is
allocated from
* the inactive port binding on the target host is updated to have the RP
UUID in the allocation key according to the resource allocation on the
destination host.

As the port binding is already updated in the conductor the late check
about the allocation key in the binding profile is turned off for live
migration in the neutronv2 api.

Note that this patch only handles the live migration without target host
subsequent patches will add support for migration with target host and
other edge case like reschedule.

blueprint: support-move-ops-with-qos-ports-ussuri

Change-Id: Ibb84ea210795634f02997d4627e0beec5fea36ee
This commit is contained in:
Balazs Gibizer
2019-11-25 15:03:10 +01:00
parent b96bc9d675
commit bd8e2fe9c8
6 changed files with 273 additions and 29 deletions
-1
View File
@@ -8508,7 +8508,6 @@ class ComputeManager(manager.Manager):
migration = objects.Migration(source_compute=instance.host,
dest_compute=self.host,
migration_type='live-migration')
# TODO(gibi): calculate and pass resource_provider_mapping
self.network_api.migrate_instance_finish(
context, instance, migration, provider_mappings=None)
+55 -16
View File
@@ -257,7 +257,12 @@ class LiveMigrationTask(base.TaskBase):
self._check_destination_has_enough_memory()
source_node, dest_node = self._check_compatible_with_source_hypervisor(
self.destination)
self._call_livem_checks_on_host(self.destination)
# NOTE(gibi): This code path is used when the live migration is forced
# to a target host and skipping the scheduler. Such operation is
# rejected for servers with nested resource allocations since
# I7cbd5d9fb875ebf72995362e0b6693492ce32051. So here we can safely
# assume that the provider mapping is empty.
self._call_livem_checks_on_host(self.destination, {})
# Make sure the forced destination host is in the same cell that the
# instance currently lives in.
# NOTE(mriedem): This can go away if/when the forced destination host
@@ -316,7 +321,7 @@ class LiveMigrationTask(base.TaskBase):
raise exception.DestinationHypervisorTooOld()
return source_info, destination_info
def _call_livem_checks_on_host(self, destination):
def _call_livem_checks_on_host(self, destination, provider_mapping):
self._check_can_migrate_pci(self.source, destination)
try:
self.migrate_data = self.compute_rpcapi.\
@@ -338,11 +343,25 @@ class LiveMigrationTask(base.TaskBase):
self.migrate_data.vifs = migrate_data_obj.VIFMigrateData.\
create_skeleton_migrate_vifs(
self.instance.get_network_info())
bindings = self._bind_ports_on_destination(destination)
bindings = self._bind_ports_on_destination(
destination, provider_mapping)
self._update_migrate_vifs_from_bindings(self.migrate_data.vifs,
bindings)
def _bind_ports_on_destination(self, destination):
@staticmethod
def _get_port_profile_from_provider_mapping(port_id, provider_mappings):
if port_id in provider_mappings:
# NOTE(gibi): In the resource provider mapping there can be
# more than one RP fulfilling a request group. But resource
# requests of a Neutron port is always mapped to a
# numbered request group that is always fulfilled by one
# resource provider. So we only pass that single RP UUID
# here.
return {'allocation': provider_mappings[port_id][0]}
else:
return {}
def _bind_ports_on_destination(self, destination, provider_mappings):
LOG.debug('Start binding ports on destination host: %s', destination,
instance=self.instance)
# Bind ports on the destination host; returns a dict, keyed by
@@ -355,15 +374,18 @@ class LiveMigrationTask(base.TaskBase):
# if that is the case, it may have updated the required port
# profile for the destination node (e.g new PCI address if SR-IOV)
# perform port binding against the requested profile
migrate_vifs_with_profile = [mig_vif for mig_vif in
self.migrate_data.vifs
if 'profile_json' in mig_vif]
ports_profile = None
if migrate_vifs_with_profile:
# Update to the port profile is required
ports_profile = {mig_vif.port_id: mig_vif.profile
for mig_vif in migrate_vifs_with_profile}
ports_profile = {}
for mig_vif in self.migrate_data.vifs:
profile = mig_vif.profile if 'profile_json' in mig_vif else {}
# NOTE(gibi): provider_mappings also contribute to the
# binding profile of the ports if the port has resource
# request. So we need to merge the profile information from
# both sources.
profile.update(
self._get_port_profile_from_provider_mapping(
mig_vif.port_id, provider_mappings))
if profile:
ports_profile[mig_vif.port_id] = profile
bindings = self.network_api.bind_ports_to_host(
context=self.context, instance=self.instance, host=destination,
@@ -425,8 +447,15 @@ class LiveMigrationTask(base.TaskBase):
# is not forced to be the original host
request_spec.reset_forced_destinations()
# TODO(gibi): We need to make sure that the requested_resources field
# is re calculated based on neutron ports.
port_res_req = (
self.network_api.get_requested_resource_for_instance(
self.context, self.instance.uuid))
# NOTE(gibi): When cyborg or other module wants to handle
# similar non-nova resources then here we have to collect
# all the external resource requests in a single list and
# add them to the RequestSpec.
request_spec.requested_resources = port_res_req
scheduler_utils.setup_instance_group(self.context, request_spec)
# We currently only support live migrating to hosts in the same
@@ -476,9 +505,19 @@ class LiveMigrationTask(base.TaskBase):
# ex.exc_type.
raise exception.MigrationSchedulerRPCError(
reason=six.text_type(ex))
scheduler_utils.fill_provider_mapping(request_spec, selection)
provider_mapping = request_spec.get_request_group_mapping()
if provider_mapping:
compute_utils.\
update_pci_request_spec_with_allocated_interface_name(
self.context, self.report_client, self.instance,
provider_mapping)
try:
self._check_compatible_with_source_hypervisor(host)
self._call_livem_checks_on_host(host)
self._call_livem_checks_on_host(host, provider_mapping)
except (exception.Invalid, exception.MigrationPreCheckError) as e:
LOG.debug("Skipping host: %(host)s because: %(e)s",
{"host": host, "e": e})
+4 -5
View File
@@ -1260,10 +1260,6 @@ class API(base_api.NetworkAPI):
client = _get_ksa_client(context, admin=True)
# TODO(gibi): To support ports with resource request during server
# live migrate operation we need to take care of 'allocation' key in
# the binding profile per binding.
bindings_by_port_id = {}
for vif in network_info:
# Now bind each port to the destination host and keep track of each
@@ -3399,7 +3395,10 @@ class API(base_api.NetworkAPI):
reason=_("Unable to correlate PCI slot %s") %
pci_slot)
if p.get('resource_request'):
# NOTE(gibi): during live migration the conductor already sets the
# allocation key in the port binding
if (p.get('resource_request') and
migration['migration_type'] != constants.LIVE_MIGRATION):
if not provider_mappings:
# TODO(gibi): Remove this check when compute RPC API is
# bumped to 6.0
+57
View File
@@ -7237,6 +7237,63 @@ class ServerMoveWithPortResourceRequestTest(
server, self.compute1_rp_uuid, non_qos_port, qos_port,
qos_sriov_port, self.flavor_with_group_policy)
def _turn_off_api_check(self):
# The API actively rejecting the move operations with resource
# request so we have to turn off that check.
# TODO(gibi): Remove this when the move operations are supported and
# the API check is removed.
patcher = mock.patch(
'nova.api.openstack.common.'
'supports_port_resource_request_during_move',
return_value=True)
self.addCleanup(patcher.stop)
patcher.start()
def test_live_migrate_with_qos_port(self):
# TODO(gibi): remove this when live migration is fully supported and
# therefore the check is removed from the api
self._turn_off_api_check()
non_qos_normal_port = self.neutron.port_1
qos_normal_port = self.neutron.port_with_resource_request
qos_sriov_port = self.neutron.port_with_sriov_resource_request
server = self._create_server_with_ports(
non_qos_normal_port, qos_normal_port, qos_sriov_port)
# check that the server allocates from the current host properly
self._check_allocation(
server, self.compute1_rp_uuid, non_qos_normal_port,
qos_normal_port, qos_sriov_port, self.flavor_with_group_policy)
self.api.post_server_action(
server['id'],
{
'os-migrateLive': {
'host': None,
'block_migration': 'auto'
}
}
)
self._wait_for_server_parameter(
server,
{'OS-EXT-SRV-ATTR:host': 'host2',
'status': 'ACTIVE'})
self._check_allocation(
server, self.compute2_rp_uuid, non_qos_normal_port,
qos_normal_port, qos_sriov_port, self.flavor_with_group_policy)
self._delete_server_and_check_allocations(
server, qos_normal_port, qos_sriov_port)
# TODO(gibi): add tests for live migration cases:
# * with target host
# * re-schedule success
# * re-schedule fail -> pci request cleanup?
# * abort / cancel -> dest / pci request cleanup?
class PortResourceRequestReSchedulingTest(
PortResourceRequestBasedSchedulingTestBase):
@@ -12,6 +12,7 @@
import mock
import oslo_messaging as messaging
from oslo_serialization import jsonutils
from oslo_utils.fixture import uuidsentinel as uuids
import six
@@ -74,6 +75,13 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase):
self.ensure_network_metadata_mock = _p.start()
self.addCleanup(_p.stop)
_p = mock.patch(
'nova.network.neutronv2.api.API.'
'get_requested_resource_for_instance',
return_value=[])
self.mock_get_res_req = _p.start()
self.addCleanup(_p.stop)
def _generate_task(self):
self.task = live_migrate.LiveMigrationTask(self.context,
self.instance, self.destination, self.block_migration,
@@ -438,7 +446,7 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase):
mock_select.assert_called_once_with(self.context, self.fake_spec,
[self.instance.uuid], return_objects=True, return_alternates=False)
mock_check.assert_called_once_with('host1')
mock_call.assert_called_once_with('host1')
mock_call.assert_called_once_with('host1', {})
@mock.patch.object(live_migrate.LiveMigrationTask,
'_call_livem_checks_on_host')
@@ -459,7 +467,7 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase):
self.context, self.fake_spec, [self.instance.uuid],
return_objects=True, return_alternates=False)
mock_check.assert_called_once_with('host1')
mock_call.assert_called_once_with('host1')
mock_call.assert_called_once_with('host1', {})
@mock.patch.object(live_migrate.LiveMigrationTask,
'_remove_host_allocations')
@@ -486,7 +494,7 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase):
mock.call(self.context, self.fake_spec, [self.instance.uuid],
return_objects=True, return_alternates=False)])
mock_check.assert_has_calls([mock.call('host1'), mock.call('host2')])
mock_call.assert_called_once_with('host2')
mock_call.assert_called_once_with('host2', {})
def test_find_destination_retry_with_old_hypervisor(self):
self._test_find_destination_retry_hypervisor_raises(
@@ -521,7 +529,8 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase):
mock.call(self.context, self.fake_spec, [self.instance.uuid],
return_objects=True, return_alternates=False)])
mock_check.assert_has_calls([mock.call('host1'), mock.call('host2')])
mock_call.assert_has_calls([mock.call('host1'), mock.call('host2')])
mock_call.assert_has_calls(
[mock.call('host1', {}), mock.call('host2', {})])
@mock.patch.object(live_migrate.LiveMigrationTask,
'_remove_host_allocations')
@@ -549,7 +558,8 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase):
mock.call(self.context, self.fake_spec, [self.instance.uuid],
return_objects=True, return_alternates=False)])
mock_check.assert_has_calls([mock.call('host1'), mock.call('host2')])
mock_call.assert_has_calls([mock.call('host1'), mock.call('host2')])
mock_call.assert_has_calls(
[mock.call('host1', {}), mock.call('host2', {})])
@mock.patch.object(objects.Migration, 'save')
@mock.patch.object(live_migrate.LiveMigrationTask,
@@ -612,7 +622,7 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase):
side_effect=messaging.MessagingTimeout),
mock.patch.object(self.task, '_check_can_migrate_pci')):
self.assertRaises(exception.MigrationPreCheckError,
self.task._call_livem_checks_on_host, {})
self.task._call_livem_checks_on_host, {}, {})
def test_call_livem_checks_on_host_bind_ports(self):
data = objects.LibvirtLiveMigrateData()
@@ -639,7 +649,7 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase):
network_model.VIF(uuids.port2)])
self.instance.info_cache = objects.InstanceInfoCache(
network_info=nwinfo)
self.task._call_livem_checks_on_host('dest-host')
self.task._call_livem_checks_on_host('dest-host', {})
# Assert the migrate_data set on the task based on the port
# bindings created.
self.assertIn('vifs', data)
@@ -651,6 +661,118 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase):
_test()
@mock.patch('nova.network.neutronv2.api.API.bind_ports_to_host')
def test_bind_ports_on_destination_merges_profiles(self, mock_bind_ports):
"""Assert that if both the migration_data and the provider mapping
contains binding profile related information then such information is
merged in the resulting profile.
"""
self.task.migrate_data = objects.LibvirtLiveMigrateData(
vifs=[
objects.VIFMigrateData(
port_id=uuids.port1,
profile_json=jsonutils.dumps(
{'some-key': 'value'}))
])
provider_mappings = {uuids.port1: [uuids.dest_bw_rp]}
self.task._bind_ports_on_destination('dest-host', provider_mappings)
mock_bind_ports.assert_called_once_with(
context=self.context, instance=self.instance, host='dest-host',
vnic_types=None,
port_profiles={uuids.port1: {'allocation': uuids.dest_bw_rp,
'some-key': 'value'}})
@mock.patch('nova.network.neutronv2.api.API.bind_ports_to_host')
def test_bind_ports_on_destination_migration_data(self, mock_bind_ports):
"""Assert that if only the migration_data contains binding profile
related information then that is sent to neutron.
"""
self.task.migrate_data = objects.LibvirtLiveMigrateData(
vifs=[
objects.VIFMigrateData(
port_id=uuids.port1,
profile_json=jsonutils.dumps(
{'some-key': 'value'}))
])
provider_mappings = {}
self.task._bind_ports_on_destination('dest-host', provider_mappings)
mock_bind_ports.assert_called_once_with(
context=self.context, instance=self.instance, host='dest-host',
vnic_types=None,
port_profiles={uuids.port1: {'some-key': 'value'}})
@mock.patch('nova.network.neutronv2.api.API.bind_ports_to_host')
def test_bind_ports_on_destination_provider_mapping(self, mock_bind_ports):
"""Assert that if only the provider mapping contains binding
profile related information then that is sent to neutron.
"""
self.task.migrate_data = objects.LibvirtLiveMigrateData(
vifs=[
objects.VIFMigrateData(
port_id=uuids.port1)
])
provider_mappings = {uuids.port1: [uuids.dest_bw_rp]}
self.task._bind_ports_on_destination('dest-host', provider_mappings)
mock_bind_ports.assert_called_once_with(
context=self.context, instance=self.instance, host='dest-host',
vnic_types=None,
port_profiles={uuids.port1: {'allocation': uuids.dest_bw_rp}})
@mock.patch(
'nova.compute.utils.'
'update_pci_request_spec_with_allocated_interface_name')
@mock.patch('nova.scheduler.utils.fill_provider_mapping')
@mock.patch.object(live_migrate.LiveMigrationTask,
'_call_livem_checks_on_host')
@mock.patch.object(live_migrate.LiveMigrationTask,
'_check_compatible_with_source_hypervisor')
@mock.patch.object(query.SchedulerQueryClient, 'select_destinations',
return_value=[[fake_selection1]])
@mock.patch.object(objects.RequestSpec, 'reset_forced_destinations')
@mock.patch.object(scheduler_utils, 'setup_instance_group')
def test_find_destination_with_resource_request(
self, mock_setup, mock_reset, mock_select, mock_check, mock_call,
mock_fill_provider_mapping, mock_update_pci_req):
resource_req = [objects.RequestGroup(requester_id=uuids.port_id)]
self.mock_get_res_req.return_value = resource_req
self.assertEqual(("host1", "node1", fake_limits1),
self.task._find_destination())
# Make sure the request_spec was updated to include the cell
# mapping.
self.assertIsNotNone(self.fake_spec.requested_destination.cell)
# Make sure the spec was updated to include the project_id.
self.assertEqual(self.fake_spec.project_id, self.instance.project_id)
# Make sure that requested_resources are added to the request spec
self.assertEqual(
resource_req, self.task.request_spec.requested_resources)
mock_setup.assert_called_once_with(self.context, self.fake_spec)
mock_reset.assert_called_once_with()
self.ensure_network_metadata_mock.assert_called_once_with(
self.instance)
self.heal_reqspec_is_bfv_mock.assert_called_once_with(
self.context, self.fake_spec, self.instance)
mock_select.assert_called_once_with(self.context, self.fake_spec,
[self.instance.uuid], return_objects=True, return_alternates=False)
mock_check.assert_called_once_with('host1')
mock_call.assert_called_once_with('host1', {uuids.port_id: []})
mock_fill_provider_mapping.assert_called_once_with(
self.task.request_spec, fake_selection1)
mock_update_pci_req.assert_called_once_with(
self.context, self.task.report_client, self.instance,
{uuids.port_id: []})
@mock.patch.object(objects.InstanceMapping, 'get_by_instance_uuid',
side_effect=exception.InstanceMappingNotFound(
uuid=uuids.instance))
+28
View File
@@ -4658,6 +4658,34 @@ class TestNeutronv2(TestNeutronv2Base):
"are required for ports with a resource request.",
six.text_type(ex))
@mock.patch.object(neutronapi, 'get_client', return_value=mock.Mock())
def test_update_port_bindings_for_instance_with_resource_req_live_mig(
self, get_client_mock):
instance = fake_instance.fake_instance_obj(self.context)
fake_ports = {'ports': [
{'id': 'fake-port-1',
'binding:vnic_type': 'normal',
constants.BINDING_HOST_ID: 'old-host',
constants.BINDING_PROFILE:
{'allocation': uuids.dest_compute_rp},
'resource_request': mock.sentinel.resource_request}]}
migration = objects.Migration(
status='confirmed', migration_type='live-migration')
list_ports_mock = mock.Mock(return_value=fake_ports)
get_client_mock.return_value.list_ports = list_ports_mock
# No mapping is passed in as during live migration the conductor
# already created the binding and added the allocation key
self.api._update_port_binding_for_instance(
self.context, instance, 'new-host', migration, {})
# Note that binding:profile is not updated
get_client_mock.return_value.update_port.assert_called_once_with(
'fake-port-1',
{'port': {'device_owner': 'compute:None',
'binding:host_id': 'new-host'}})
def test_get_pci_mapping_for_migration(self):
instance = fake_instance.fake_instance_obj(self.context)
instance.migration_context = objects.MigrationContext()